From 96f7518f430470c7921a503ffad395e4cc084a0a Mon Sep 17 00:00:00 2001 From: ViperEkura <3081035982@qq.com> Date: Sat, 18 Apr 2026 11:42:33 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E4=BC=98=E5=8C=96=E7=88=AC=E8=99=AB?= =?UTF-8?q?=E9=80=89=E6=8B=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- luxx/services/chat.py | 2 + luxx/tools/builtin/crawler.py | 7 ++- luxx/tools/services.py | 101 ++++++++++++++++++++++++++++++++-- pyproject.toml | 1 + 4 files changed, 102 insertions(+), 9 deletions(-) diff --git a/luxx/services/chat.py b/luxx/services/chat.py index ccc9f5a..a8dc3cc 100644 --- a/luxx/services/chat.py +++ b/luxx/services/chat.py @@ -286,6 +286,7 @@ class ChatService: "completion_tokens": 0, "total_tokens": 0 } + actual_token_count = 0 # Streaming context for state management ctx = StreamContext() @@ -482,6 +483,7 @@ class ChatService: yield _sse_event("error", {"content": "Exceeded maximum tool call iterations"}) except Exception as e: + logger.error(f"Stream error: {e}") yield _sse_event("error", {"content": str(e)}) def _save_message( diff --git a/luxx/tools/builtin/crawler.py b/luxx/tools/builtin/crawler.py index 55bcc16..e8fa7fe 100644 --- a/luxx/tools/builtin/crawler.py +++ b/luxx/tools/builtin/crawler.py @@ -6,19 +6,18 @@ from luxx.tools.services import SearchService, FetchService _fetch_service = FetchService() _search_service = SearchService() - @tool(name="web_search", description="Search the internet. Use when you need to find latest news or answer questions.", parameters={ "type": "object", "properties": { "query": {"type": "string", "description": "Search keywords"}, "max_results": {"type": "integer", "description": "Number of results, default 5", "default": 5}, - "region": {"type": "string", "description": "Search region (e.g. cn-zh for China, us-en for US)", "default": "cn-zh"} + "region": {"type": "string", "description": "Search region (e.g. cn-zh for China, us-en for US)", "default": "cn-zh"}, }, "required": ["query"] }, required_params=["query"], category="crawler") def web_search(arguments: dict): """ - Search the web using DuckDuckGo + Search the web using DuckDuckGo or Bing Returns: {"query": str, "count": int, "results": list} @@ -26,6 +25,8 @@ def web_search(arguments: dict): query = arguments["query"] max_results = arguments.get("max_results", 5) region = arguments.get("region", "cn-zh") + + results = _search_service.search(query, max_results, region) return { diff --git a/luxx/tools/services.py b/luxx/tools/services.py index c350893..05806d6 100644 --- a/luxx/tools/services.py +++ b/luxx/tools/services.py @@ -1,17 +1,19 @@ """Tool helper services""" import re import httpx -from urllib.parse import parse_qs, urlparse, quote +from urllib.parse import quote from typing import List from concurrent.futures import ThreadPoolExecutor, as_completed from bs4 import BeautifulSoup from ddgs import DDGS +from curl_cffi import requests as curl_requests + class SearchService: - """Search service using DuckDuckGo""" + """Search service supporting multiple engines""" - def __init__(self, engine: str = "duckduckgo"): + def __init__(self, engine: str = "bing"): self.engine = engine def search( @@ -33,6 +35,8 @@ class SearchService: """ if self.engine == "duckduckgo": return self._search_duckduckgo(query, max_results, region) + elif self.engine == "bing": + return self._search_bing(query, max_results, region) else: raise ValueError(f"Unsupported search engine: {self.engine}") @@ -60,6 +64,85 @@ class SearchService: for r in results ] + def _search_bing( + self, + query: str, + max_results: int, + region: str + ) -> List[dict]: + """Bing search using curl-cffi to simulate browser""" + # Map region to Bing market code + market_map = { + "cn-zh": "zh-CN", + "us-en": "en-US", + } + market = market_map.get(region, "en-US") + + results = [] + offset = 0 + + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": f"{market},en;q=0.5", + "Accept-Encoding": "gzip, deflate, br", + "Connection": "keep-alive", + } + + while len(results) < max_results: + url = f"https://www.bing.com/search?q={quote(query)}&first={offset}&mkt={market}" + + try: + response = curl_requests.get( + url, + headers=headers, + impersonate="chrome", + timeout=15 + ) + + if response.status_code != 200: + break + + soup = BeautifulSoup(response.text, "html.parser") + + # Find search result items + for item in soup.select("li.b_algo"): + title_elem = item.select_one("h2 a") + snippet_elem = item.select_one("div.b_paractl") + cite_elem = item.select_one("cite") + + if title_elem: + title = title_elem.get_text(strip=True) + url = title_elem.get("href", "") + + # Get snippet + snippet = "" + if snippet_elem: + snippet = snippet_elem.get_text(strip=True) + elif cite_elem: + snippet = cite_elem.get_text(strip=True) + + results.append({ + "title": title, + "url": url, + "snippet": snippet + }) + + if len(results) >= max_results: + break + + # Check if there are more results + next_page = soup.select_one("a.sb_pagN") + if not next_page or len(results) >= max_results: + break + + offset += 10 + + except Exception as e: + break + + return results[:max_results] + class FetchService: """Page fetch service with content extraction support""" @@ -91,11 +174,17 @@ class FetchService: url = "https://" + url try: - resp = httpx.get( + resp = curl_requests.get( url, timeout=self.timeout, - follow_redirects=True, - headers={"User-Agent": self.user_agent} + impersonate="chrome", + headers={ + "User-Agent": self.user_agent, + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.5", + "Accept-Encoding": "gzip, deflate, br", + "Connection": "keep-alive", + } ) resp.raise_for_status() except Exception as e: diff --git a/pyproject.toml b/pyproject.toml index e64e109..c2d7515 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,7 @@ dependencies = [ "shortuuid>=1.0.11", "sse-starlette>=2.0.0", "ddgs>=5.0.0", + "curl-cffi>=0.6.0", ] [project.optional-dependencies]