"""Crawler related tools""" from luxx.tools.factory import tool from luxx.tools.services import SearchService, FetchService @tool( name="web_search", description="Search the internet for information. Use when you need to find latest news or answer questions.", parameters={ "type": "object", "properties": { "query": { "type": "string", "description": "Search keywords" }, "max_results": { "type": "integer", "description": "Number of results to return, default 5", "default": 5 } }, "required": ["query"] }, category="crawler" ) def web_search(arguments: dict) -> dict: """ Web search tool using DuckDuckGo """ query = arguments["query"] max_results = arguments.get("max_results", 5) service = SearchService() results = service.search(query, max_results) if not results: return {"success": True, "data": {"query": query, "results": []}, "message": "No results found"} return {"success": True, "data": {"query": query, "results": results}} @tool( name="web_fetch", description="Fetch content from a webpage. Use when user needs detailed information from a page.", parameters={ "type": "object", "properties": { "url": { "type": "string", "description": "URL of the webpage to fetch" }, "extract_type": { "type": "string", "description": "Extraction type: text, links, or structured", "enum": ["text", "links", "structured"], "default": "text" } }, "required": ["url"] }, category="crawler" ) def web_fetch(arguments: dict) -> dict: """ Page fetch tool """ url = arguments["url"] extract_type = arguments.get("extract_type", "text") if not url: return {"success": False, "error": "URL is required"} service = FetchService(timeout=15) result = service.fetch(url, extract_type) if "error" in result: return {"success": False, "error": result["error"]} return {"success": True, "data": result} @tool( name="batch_fetch", description="Batch fetch multiple webpages. Use when you need to get content from multiple pages.", parameters={ "type": "object", "properties": { "urls": { "type": "array", "items": {"type": "string"}, "description": "List of URLs to fetch" }, "extract_type": { "type": "string", "enum": ["text", "links", "structured"], "default": "text" } }, "required": ["urls"] }, category="crawler" ) def batch_fetch(arguments: dict) -> dict: """ Batch fetch tool """ urls = arguments["urls"] extract_type = arguments.get("extract_type", "text") if not urls: return {"success": False, "error": "URLs list is required"} if len(urls) > 10: return {"success": False, "error": "Maximum 10 pages allowed"} service = FetchService(timeout=10) results = service.fetch_batch(urls, extract_type) successful = sum(1 for r in results if "error" not in r) return { "success": True, "data": { "results": results, "total": len(results), "successful": successful } }