Luxx/luxx/tools/builtin/crawler.py

76 lines
2.2 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Crawler tools - all exception handling in decorator"""
from luxx.tools.factory import tool
from luxx.tools.services import SearchService, FetchService
# 服务实例SearchService.search() 是静态方法风格,不需要实例化)
_fetch_service = FetchService()
@tool(name="web_search", description="Search the internet. Use when you need to find latest news or answer questions.", parameters={
"type": "object",
"properties": {
"query": {"type": "string", "description": "Search keywords"},
"max_results": {"type": "integer", "description": "Number of results, default 5", "default": 5}
},
"required": ["query"]
}, required_params=["query"], category="crawler")
def web_search(arguments: dict):
"""
Search the web using DuckDuckGo
Returns:
{"query": str, "count": int, "results": list}
"""
query = arguments["query"]
max_results = arguments.get("max_results", 5)
results = SearchService().search(query, max_results)
return {
"query": query,
"count": len(results),
"results": results
}
@tool(name="web_fetch", description="Fetch content from a webpage.", parameters={
"type": "object",
"properties": {
"url": {"type": "string", "description": "URL to fetch"}
},
"required": ["url"]
}, required_params=["url"], category="crawler")
def web_fetch(arguments: dict):
"""
Fetch webpage content
Returns:
{"url": str, "title": str, "text": str}
"""
url = arguments["url"]
return _fetch_service.fetch(url)
@tool(name="batch_fetch", description="Batch fetch multiple webpages.", parameters={
"type": "object",
"properties": {
"urls": {"type": "array", "items": {"type": "string"}, "description": "URLs to fetch"}
},
"required": ["urls"]
}, required_params=["urls"], category="crawler")
def batch_fetch(arguments: dict):
"""
Batch fetch multiple webpages
Returns:
{"count": int, "results": list}
"""
urls = arguments.get("urls", [])
results = _fetch_service.fetch_batch(urls)
return {
"count": len(results),
"results": results
}