Luxx/luxx/tools/builtin/crawler.py

129 lines
3.5 KiB
Python

"""Crawler related tools"""
from luxx.tools.factory import tool
from luxx.tools.services import SearchService, FetchService
@tool(
name="web_search",
description="Search the internet for information. Use when you need to find latest news or answer questions.",
parameters={
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search keywords"
},
"max_results": {
"type": "integer",
"description": "Number of results to return, default 5",
"default": 5
}
},
"required": ["query"]
},
category="crawler"
)
def web_search(arguments: dict) -> dict:
"""
Web search tool using DuckDuckGo
"""
query = arguments["query"]
max_results = arguments.get("max_results", 5)
service = SearchService()
results = service.search(query, max_results)
if not results:
return {"success": True, "data": {"query": query, "results": []}, "message": "No results found"}
return {"success": True, "data": {"query": query, "results": results}}
@tool(
name="web_fetch",
description="Fetch content from a webpage. Use when user needs detailed information from a page.",
parameters={
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "URL of the webpage to fetch"
},
"extract_type": {
"type": "string",
"description": "Extraction type: text, links, or structured",
"enum": ["text", "links", "structured"],
"default": "text"
}
},
"required": ["url"]
},
category="crawler"
)
def web_fetch(arguments: dict) -> dict:
"""
Page fetch tool
"""
url = arguments["url"]
extract_type = arguments.get("extract_type", "text")
if not url:
return {"success": False, "error": "URL is required"}
service = FetchService(timeout=15)
result = service.fetch(url, extract_type)
if "error" in result:
return {"success": False, "error": result["error"]}
return {"success": True, "data": result}
@tool(
name="batch_fetch",
description="Batch fetch multiple webpages. Use when you need to get content from multiple pages.",
parameters={
"type": "object",
"properties": {
"urls": {
"type": "array",
"items": {"type": "string"},
"description": "List of URLs to fetch"
},
"extract_type": {
"type": "string",
"enum": ["text", "links", "structured"],
"default": "text"
}
},
"required": ["urls"]
},
category="crawler"
)
def batch_fetch(arguments: dict) -> dict:
"""
Batch fetch tool
"""
urls = arguments["urls"]
extract_type = arguments.get("extract_type", "text")
if not urls:
return {"success": False, "error": "URLs list is required"}
if len(urls) > 10:
return {"success": False, "error": "Maximum 10 pages allowed"}
service = FetchService(timeout=10)
results = service.fetch_batch(urls, extract_type)
successful = sum(1 for r in results if "error" not in r)
return {
"success": True,
"data": {
"results": results,
"total": len(results),
"successful": successful
}
}