"""Crawler tools - all exception handling in decorator""" from luxx.tools.factory import tool from luxx.tools.services import SearchService, FetchService _fetch_service = FetchService() _search_service = SearchService() @tool(name="web_search", description="Search the internet. Use when you need to find latest news or answer questions.", parameters={ "type": "object", "properties": { "query": {"type": "string", "description": "Search keywords"}, "max_results": {"type": "integer", "description": "Number of results, default 5", "default": 5}, "region": {"type": "string", "description": "Search region (e.g. cn-zh for China, us-en for US)", "default": "cn-zh"} }, "required": ["query"] }, required_params=["query"], category="crawler") def web_search(arguments: dict): """ Search the web using DuckDuckGo Returns: {"query": str, "count": int, "results": list} """ query = arguments["query"] max_results = arguments.get("max_results", 5) region = arguments.get("region", "cn-zh") results = _search_service.search(query, max_results, region) return { "query": query, "count": len(results), "results": results } @tool(name="web_fetch", description="Fetch content from a webpage.", parameters={ "type": "object", "properties": { "url": {"type": "string", "description": "URL to fetch"}, "extract_type": {"type": "string", "description": "Extraction type: text, links, or structured", "enum": ["text", "links", "structured"], "default": "text"} }, "required": ["url"] }, required_params=["url"], category="crawler") def web_fetch(arguments: dict): """ Fetch webpage content Returns: {"url": str, "text/links/structured": ...} """ url = arguments["url"] extract_type = arguments.get("extract_type", "text") return _fetch_service.fetch(url, extract_type) @tool(name="batch_fetch", description="Batch fetch multiple webpages.", parameters={ "type": "object", "properties": { "urls": {"type": "array", "items": {"type": "string"}, "description": "URLs to fetch"}, "extract_type": {"type": "string", "description": "Extraction type: text, links, or structured", "enum": ["text", "links", "structured"], "default": "text"} }, "required": ["urls"] }, required_params=["urls"], category="crawler") def batch_fetch(arguments: dict): """ Batch fetch multiple webpages Returns: {"count": int, "results": list} """ urls = arguments.get("urls", []) extract_type = arguments.get("extract_type", "text") if len(urls) > 10: return {"error": "Maximum 10 pages can be fetched at once"} results = _fetch_service.fetch_batch(urls, extract_type) return { "count": len(results), "results": results }