52 lines
2.0 KiB
Python
52 lines
2.0 KiB
Python
"""Crawler tools"""
|
|
from luxx.tools.factory import tool
|
|
from luxx.tools.services import SearchService, FetchService
|
|
|
|
|
|
@tool(name="web_search", description="Search the internet. Use when you need to find latest news or answer questions.", parameters={
|
|
"type": "object",
|
|
"properties": {
|
|
"query": {"type": "string", "description": "Search keywords"},
|
|
"max_results": {"type": "integer", "description": "Number of results, default 5", "default": 5}
|
|
},
|
|
"required": ["query"]
|
|
}, category="crawler")
|
|
def web_search(arguments: dict) -> dict:
|
|
results = SearchService().search(arguments["query"], arguments.get("max_results", 5))
|
|
return {"results": results or []}
|
|
|
|
|
|
@tool(name="web_fetch", description="Fetch content from a webpage.", parameters={
|
|
"type": "object",
|
|
"properties": {
|
|
"url": {"type": "string", "description": "URL to fetch"},
|
|
"extract_type": {"type": "string", "enum": ["text", "links", "structured"], "default": "text"}
|
|
},
|
|
"required": ["url"]
|
|
}, category="crawler")
|
|
def web_fetch(arguments: dict) -> dict:
|
|
if not arguments.get("url"):
|
|
return {"error": "URL is required"}
|
|
result = FetchService().fetch(arguments["url"], arguments.get("extract_type", "text"))
|
|
if "error" in result:
|
|
return {"error": result["error"]}
|
|
return result
|
|
|
|
|
|
@tool(name="batch_fetch", description="Batch fetch multiple webpages.", parameters={
|
|
"type": "object",
|
|
"properties": {
|
|
"urls": {"type": "array", "items": {"type": "string"}, "description": "URLs to fetch"},
|
|
"extract_type": {"type": "string", "enum": ["text", "links", "structured"], "default": "text"}
|
|
},
|
|
"required": ["urls"]
|
|
}, category="crawler")
|
|
def batch_fetch(arguments: dict) -> dict:
|
|
urls = arguments.get("urls", [])
|
|
if not urls:
|
|
return {"error": "URLs list is required"}
|
|
if len(urls) > 10:
|
|
return {"error": "Maximum 10 pages allowed"}
|
|
results = FetchService().fetch_batch(urls, arguments.get("extract_type", "text"))
|
|
return {"results": results, "total": len(results)}
|