feat: 初步搭建 multi-agent 框架
This commit is contained in:
parent
41a4d997fd
commit
7bd19a7529
|
|
@ -17,6 +17,10 @@ def register_routes(app: Flask):
|
|||
client = LLMClient(MODEL_CONFIG)
|
||||
init_chat_service(client)
|
||||
|
||||
# Register LLM client in service locator so tools (e.g. agent_task) can access it
|
||||
from backend.tools import register_service
|
||||
register_service("llm_client", client)
|
||||
|
||||
# Initialize authentication system (reads auth_mode from config.yml)
|
||||
init_auth(app)
|
||||
|
||||
|
|
|
|||
|
|
@ -56,11 +56,11 @@ class ChatService:
|
|||
executor = ToolExecutor(registry=registry)
|
||||
|
||||
# Build context for tool execution
|
||||
context = None
|
||||
context = {"model": conv_model}
|
||||
if project_id:
|
||||
context = {"project_id": project_id}
|
||||
context["project_id"] = project_id
|
||||
elif conv.project_id:
|
||||
context = {"project_id": conv.project_id}
|
||||
context["project_id"] = conv.project_id
|
||||
|
||||
def generate():
|
||||
messages = list(initial_messages)
|
||||
|
|
@ -178,10 +178,8 @@ class ChatService:
|
|||
if tool_calls_list:
|
||||
all_tool_calls.extend(tool_calls_list)
|
||||
|
||||
# Execute each tool call, emit tool_call + tool_result as paired steps
|
||||
tool_results = []
|
||||
# Phase 1: emit all tool_call steps (before execution)
|
||||
for tc in tool_calls_list:
|
||||
# Emit tool_call step (before execution)
|
||||
call_step = {
|
||||
'id': f'step-{step_index}',
|
||||
'index': step_index,
|
||||
|
|
@ -194,17 +192,24 @@ class ChatService:
|
|||
yield f"event: process_step\ndata: {json.dumps(call_step, ensure_ascii=False)}\n\n"
|
||||
step_index += 1
|
||||
|
||||
# Execute the tool
|
||||
# Phase 2: execute tools — parallel when multiple, sequential when single
|
||||
if len(tool_calls_list) > 1:
|
||||
with app.app_context():
|
||||
single_result = executor.process_tool_calls([tc], context)
|
||||
tool_results.extend(single_result)
|
||||
tool_results = executor.process_tool_calls_parallel(
|
||||
tool_calls_list, context, max_workers=4
|
||||
)
|
||||
else:
|
||||
with app.app_context():
|
||||
tool_results = executor.process_tool_calls(
|
||||
tool_calls_list, context
|
||||
)
|
||||
|
||||
# Emit tool_result step (after execution)
|
||||
tr = single_result[0]
|
||||
# Phase 3: emit all tool_result steps (after execution, same order)
|
||||
for tr in tool_results:
|
||||
try:
|
||||
result_content = json.loads(tr["content"])
|
||||
skipped = result_content.get("skipped", False)
|
||||
except:
|
||||
except Exception:
|
||||
skipped = False
|
||||
result_step = {
|
||||
'id': f'step-{step_index}',
|
||||
|
|
|
|||
|
|
@ -20,13 +20,29 @@ from backend.tools.factory import tool, register_tool
|
|||
from backend.tools.executor import ToolExecutor
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Service locator – allows tools (e.g. agent_task) to access LLM client
|
||||
# ---------------------------------------------------------------------------
|
||||
_services: dict = {}
|
||||
|
||||
|
||||
def register_service(name: str, service) -> None:
|
||||
"""Register a shared service (e.g. LLM client) for tool access."""
|
||||
_services[name] = service
|
||||
|
||||
|
||||
def get_service(name: str):
|
||||
"""Retrieve a previously registered service, or None."""
|
||||
return _services.get(name)
|
||||
|
||||
|
||||
def init_tools() -> None:
|
||||
"""
|
||||
Initialize all built-in tools
|
||||
|
||||
Importing builtin module automatically registers all decorator-defined tools
|
||||
"""
|
||||
from backend.tools.builtin import code, crawler, data, weather, file_ops # noqa: F401
|
||||
from backend.tools.builtin import code, crawler, data, weather, file_ops, agent # noqa: F401
|
||||
|
||||
|
||||
# Public API exports
|
||||
|
|
@ -43,4 +59,7 @@ __all__ = [
|
|||
"register_tool",
|
||||
# Initialization
|
||||
"init_tools",
|
||||
# Service locator
|
||||
"register_service",
|
||||
"get_service",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -4,3 +4,4 @@ from backend.tools.builtin.crawler import *
|
|||
from backend.tools.builtin.data import *
|
||||
from backend.tools.builtin.file_ops import *
|
||||
from backend.tools.builtin.weather import *
|
||||
from backend.tools.builtin.agent import *
|
||||
|
|
|
|||
|
|
@ -0,0 +1,348 @@
|
|||
"""Multi-agent tools for concurrent and batch task execution.
|
||||
|
||||
Provides:
|
||||
- parallel_execute: Run multiple tool calls concurrently
|
||||
- agent_task: Spawn sub-agents with their own LLM conversation loops
|
||||
"""
|
||||
import json
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
from backend.tools.factory import tool
|
||||
from backend.tools.core import registry
|
||||
from backend.tools.executor import ToolExecutor
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# parallel_execute – run multiple tool calls concurrently
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@tool(
|
||||
name="parallel_execute",
|
||||
description=(
|
||||
"Execute multiple tool calls concurrently for better performance. "
|
||||
"Use when you have several independent operations that don't depend on each other "
|
||||
"(e.g. reading multiple files, running multiple searches, fetching several pages). "
|
||||
"Results are returned in the same order as the input."
|
||||
),
|
||||
parameters={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"tool_calls": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "Tool name to execute",
|
||||
},
|
||||
"arguments": {
|
||||
"type": "object",
|
||||
"description": "Arguments for the tool",
|
||||
},
|
||||
},
|
||||
"required": ["name", "arguments"],
|
||||
},
|
||||
"description": "List of tool calls to execute in parallel (max 10)",
|
||||
},
|
||||
"concurrency": {
|
||||
"type": "integer",
|
||||
"description": "Max concurrent executions (1-5, default 3)",
|
||||
"default": 3,
|
||||
},
|
||||
},
|
||||
"required": ["tool_calls"],
|
||||
},
|
||||
category="agent",
|
||||
)
|
||||
def parallel_execute(arguments: dict) -> dict:
|
||||
"""Execute multiple tool calls concurrently.
|
||||
|
||||
Args:
|
||||
arguments: {
|
||||
"tool_calls": [
|
||||
{"name": "file_read", "arguments": {"path": "a.py"}},
|
||||
{"name": "web_search", "arguments": {"query": "python"}}
|
||||
],
|
||||
"concurrency": 3,
|
||||
"_project_id": "..." // injected by executor
|
||||
}
|
||||
|
||||
Returns:
|
||||
{"results": [{index, tool_name, success, data/error}]}
|
||||
"""
|
||||
tool_calls = arguments["tool_calls"]
|
||||
concurrency = min(max(arguments.get("concurrency", 3), 1), 5)
|
||||
|
||||
if len(tool_calls) > 10:
|
||||
return {"success": False, "error": "Maximum 10 tool calls allowed per parallel execution"}
|
||||
|
||||
# Build executor context from injected fields
|
||||
context = {}
|
||||
project_id = arguments.get("_project_id")
|
||||
if project_id:
|
||||
context["project_id"] = project_id
|
||||
|
||||
# Format tool_calls into executor-compatible format
|
||||
executor_calls = []
|
||||
for i, tc in enumerate(tool_calls):
|
||||
executor_calls.append({
|
||||
"id": f"pe-{i}",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": tc["name"],
|
||||
"arguments": json.dumps(tc["arguments"], ensure_ascii=False),
|
||||
},
|
||||
})
|
||||
|
||||
# Use ToolExecutor for proper context injection, caching and dedup
|
||||
executor = ToolExecutor(registry=registry, enable_cache=False)
|
||||
executor_results = executor.process_tool_calls_parallel(
|
||||
executor_calls, context, max_workers=concurrency
|
||||
)
|
||||
|
||||
# Format output
|
||||
results = []
|
||||
for er in executor_results:
|
||||
try:
|
||||
content = json.loads(er["content"]) if isinstance(er["content"], str) else er["content"]
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
content = {"success": False, "error": "Failed to parse result"}
|
||||
results.append({
|
||||
"index": len(results),
|
||||
"tool_name": er["name"],
|
||||
**content,
|
||||
})
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"results": results,
|
||||
"total": len(results),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# agent_task – spawn sub-agents with independent LLM conversation loops
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _run_sub_agent(
|
||||
task_name: str,
|
||||
instruction: str,
|
||||
tool_names: Optional[List[str]],
|
||||
model: str,
|
||||
max_tokens: int,
|
||||
project_id: Optional[str],
|
||||
app: Any,
|
||||
max_iterations: int = 3,
|
||||
) -> dict:
|
||||
"""Run a single sub-agent with its own agentic loop.
|
||||
|
||||
Each sub-agent gets its own ToolExecutor instance and runs a simplified
|
||||
version of the main agent loop, limited to prevent runaway cost.
|
||||
"""
|
||||
from backend.tools import get_service
|
||||
|
||||
llm_client = get_service("llm_client")
|
||||
if not llm_client:
|
||||
return {
|
||||
"task_name": task_name,
|
||||
"success": False,
|
||||
"error": "LLM client not available",
|
||||
}
|
||||
|
||||
# Build tool list – filter to requested tools or use all
|
||||
all_tools = registry.list_all()
|
||||
if tool_names:
|
||||
allowed = set(tool_names)
|
||||
tools = [t for t in all_tools if t["function"]["name"] in allowed]
|
||||
else:
|
||||
tools = all_tools
|
||||
|
||||
executor = ToolExecutor(registry=registry)
|
||||
context = {"project_id": project_id} if project_id else None
|
||||
|
||||
# System prompt: instruction + reminder to give a final text answer
|
||||
system_msg = (
|
||||
f"{instruction}\n\n"
|
||||
"IMPORTANT: After gathering information via tools, you MUST provide a final "
|
||||
"text response with your analysis/answer. Do NOT end with only tool calls."
|
||||
)
|
||||
messages = [{"role": "system", "content": system_msg}]
|
||||
|
||||
for _ in range(max_iterations):
|
||||
try:
|
||||
with app.app_context():
|
||||
resp = llm_client.call(
|
||||
model=model,
|
||||
messages=messages,
|
||||
tools=tools if tools else None,
|
||||
stream=False,
|
||||
max_tokens=min(max_tokens, 4096),
|
||||
temperature=0.7,
|
||||
timeout=60,
|
||||
)
|
||||
|
||||
if resp.status_code != 200:
|
||||
error_detail = resp.text[:500] if resp.text else f"HTTP {resp.status_code}"
|
||||
return {
|
||||
"task_name": task_name,
|
||||
"success": False,
|
||||
"error": f"LLM API error: {error_detail}",
|
||||
}
|
||||
|
||||
data = resp.json()
|
||||
choice = data["choices"][0]
|
||||
message = choice["message"]
|
||||
|
||||
if message.get("tool_calls"):
|
||||
messages.append(message)
|
||||
tc_list = message["tool_calls"]
|
||||
# Convert OpenAI tool_calls to executor format
|
||||
executor_calls = []
|
||||
for tc in tc_list:
|
||||
executor_calls.append({
|
||||
"id": tc.get("id", ""),
|
||||
"type": tc.get("type", "function"),
|
||||
"function": {
|
||||
"name": tc["function"]["name"],
|
||||
"arguments": tc["function"]["arguments"],
|
||||
},
|
||||
})
|
||||
tool_results = executor.process_tool_calls(executor_calls, context)
|
||||
messages.extend(tool_results)
|
||||
else:
|
||||
# Final text response
|
||||
return {
|
||||
"task_name": task_name,
|
||||
"success": True,
|
||||
"response": message.get("content", ""),
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"task_name": task_name,
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
# Exhausted iterations without final response — return last LLM output if any
|
||||
return {
|
||||
"task_name": task_name,
|
||||
"success": True,
|
||||
"response": "Agent task completed but did not produce a final text response within the iteration limit.",
|
||||
}
|
||||
|
||||
|
||||
# @tool(
|
||||
# name="agent_task",
|
||||
# description=(
|
||||
# "Spawn one or more sub-agents to work on tasks concurrently. "
|
||||
# "Each agent runs its own independent conversation with the LLM and can use tools. "
|
||||
# "Useful for parallel research, multi-file analysis, or dividing complex tasks into sub-tasks. "
|
||||
# "Each agent is limited to 3 iterations and 4096 tokens to control cost."
|
||||
# ),
|
||||
# parameters={
|
||||
# "type": "object",
|
||||
# "properties": {
|
||||
# "tasks": {
|
||||
# "type": "array",
|
||||
# "items": {
|
||||
# "type": "object",
|
||||
# "properties": {
|
||||
# "name": {
|
||||
# "type": "string",
|
||||
# "description": "Short name/identifier for this task",
|
||||
# },
|
||||
# "instruction": {
|
||||
# "type": "string",
|
||||
# "description": "Detailed instruction for the sub-agent",
|
||||
# },
|
||||
# "tools": {
|
||||
# "type": "array",
|
||||
# "items": {"type": "string"},
|
||||
# "description": (
|
||||
# "Tool names this agent can use (empty = all tools). "
|
||||
# "e.g. ['file_read', 'file_list', 'web_search']"
|
||||
# ),
|
||||
# },
|
||||
# },
|
||||
# "required": ["name", "instruction"],
|
||||
# },
|
||||
# "description": "Tasks for parallel sub-agents (max 5)",
|
||||
# },
|
||||
# },
|
||||
# "required": ["tasks"],
|
||||
# },
|
||||
# category="agent",
|
||||
# )
|
||||
def agent_task(arguments: dict) -> dict:
|
||||
"""Spawn sub-agents to work on tasks concurrently.
|
||||
|
||||
Args:
|
||||
arguments: {
|
||||
"tasks": [
|
||||
{
|
||||
"name": "research",
|
||||
"instruction": "Research Python async patterns...",
|
||||
"tools": ["web_search", "fetch_page"]
|
||||
},
|
||||
{
|
||||
"name": "code_review",
|
||||
"instruction": "Review code quality...",
|
||||
"tools": ["file_read", "file_list"]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
Returns:
|
||||
{"success": true, "results": [{task_name, success, response/error}]}
|
||||
"""
|
||||
from flask import current_app
|
||||
|
||||
tasks = arguments["tasks"]
|
||||
|
||||
if len(tasks) > 5:
|
||||
return {"success": False, "error": "Maximum 5 concurrent agents allowed"}
|
||||
|
||||
# Get current conversation context for model/project info
|
||||
app = current_app._get_current_object()
|
||||
|
||||
# Use injected model/project_id from executor context, fall back to defaults
|
||||
model = arguments.get("_model", "glm-5")
|
||||
project_id = arguments.get("_project_id")
|
||||
|
||||
# Execute agents concurrently (max 3 at a time)
|
||||
concurrency = min(len(tasks), 3)
|
||||
results = [None] * len(tasks)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=concurrency) as pool:
|
||||
futures = {
|
||||
pool.submit(
|
||||
_run_sub_agent,
|
||||
task["name"],
|
||||
task["instruction"],
|
||||
task.get("tools"),
|
||||
model,
|
||||
4096,
|
||||
project_id,
|
||||
app,
|
||||
): i
|
||||
for i, task in enumerate(tasks)
|
||||
}
|
||||
for future in as_completed(futures):
|
||||
idx = futures[future]
|
||||
try:
|
||||
results[idx] = future.result()
|
||||
except Exception as e:
|
||||
results[idx] = {
|
||||
"task_name": tasks[idx]["name"],
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"results": results,
|
||||
"total": len(results),
|
||||
}
|
||||
|
|
@ -2,6 +2,7 @@
|
|||
import json
|
||||
import time
|
||||
import hashlib
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from typing import List, Dict, Optional, Any
|
||||
from backend.tools.core import ToolRegistry, registry
|
||||
|
||||
|
|
@ -54,6 +55,140 @@ class ToolExecutor:
|
|||
"""Clear call history (call this at start of new conversation turn)"""
|
||||
self._call_history.clear()
|
||||
|
||||
@staticmethod
|
||||
def _inject_context(name: str, args: dict, context: Optional[dict]) -> None:
|
||||
"""Inject context fields into tool arguments in-place.
|
||||
|
||||
- file_* tools: inject project_id
|
||||
- agent_task: inject model and project_id (prefixed with _ to avoid collisions)
|
||||
- parallel_execute: inject project_id (prefixed with _ to avoid collisions)
|
||||
"""
|
||||
if not context:
|
||||
return
|
||||
if name.startswith("file_") and "project_id" in context:
|
||||
args["project_id"] = context["project_id"]
|
||||
if name == "agent_task":
|
||||
if "model" in context:
|
||||
args["_model"] = context["model"]
|
||||
if "project_id" in context:
|
||||
args["_project_id"] = context["project_id"]
|
||||
if name == "parallel_execute":
|
||||
if "project_id" in context:
|
||||
args["_project_id"] = context["project_id"]
|
||||
|
||||
def process_tool_calls_parallel(
|
||||
self,
|
||||
tool_calls: List[dict],
|
||||
context: Optional[dict] = None,
|
||||
max_workers: int = 4,
|
||||
) -> List[dict]:
|
||||
"""
|
||||
Process tool calls concurrently and return message list (ordered by input).
|
||||
|
||||
Identical logic to process_tool_calls but uses ThreadPoolExecutor so that
|
||||
independent tool calls (e.g. reading 3 files, running 2 searches) execute
|
||||
in parallel instead of sequentially.
|
||||
|
||||
Args:
|
||||
tool_calls: Tool call list returned by LLM
|
||||
context: Optional context info (user_id, project_id, etc.)
|
||||
max_workers: Maximum concurrent threads (1-6, default 4)
|
||||
|
||||
Returns:
|
||||
Tool response message list in the same order as input tool_calls.
|
||||
"""
|
||||
if len(tool_calls) <= 1:
|
||||
return self.process_tool_calls(tool_calls, context)
|
||||
|
||||
max_workers = min(max(max_workers, 1), 6)
|
||||
|
||||
# Phase 1: prepare each call (parse args, inject context, check dedup/cache)
|
||||
# This phase is fast and sequential – it must be done before parallelism
|
||||
# to avoid race conditions on seen_calls / _call_history / _cache.
|
||||
prepared: List[Optional[tuple]] = [None] * len(tool_calls)
|
||||
seen_calls: set = set()
|
||||
|
||||
for i, call in enumerate(tool_calls):
|
||||
name = call["function"]["name"]
|
||||
args_str = call["function"]["arguments"]
|
||||
call_id = call["id"]
|
||||
|
||||
# Parse JSON arguments
|
||||
try:
|
||||
args = json.loads(args_str) if isinstance(args_str, str) else args_str
|
||||
except json.JSONDecodeError:
|
||||
prepared[i] = self._create_error_result(call_id, name, "Invalid JSON arguments")
|
||||
continue
|
||||
|
||||
# Inject context into tool arguments
|
||||
self._inject_context(name, args, context)
|
||||
|
||||
# Dedup within same batch
|
||||
call_key = f"{name}:{json.dumps(args, sort_keys=True)}"
|
||||
if call_key in seen_calls:
|
||||
prepared[i] = self._create_tool_result(
|
||||
call_id, name,
|
||||
{"success": True, "data": None, "cached": True, "duplicate": True}
|
||||
)
|
||||
continue
|
||||
seen_calls.add(call_key)
|
||||
|
||||
# History dedup
|
||||
history_result = self._check_duplicate_in_history(name, args)
|
||||
if history_result is not None:
|
||||
prepared[i] = self._create_tool_result(call_id, name, {**history_result, "cached": True})
|
||||
continue
|
||||
|
||||
# Cache check
|
||||
cache_key = self._make_cache_key(name, args)
|
||||
cached_result = self._get_cached(cache_key)
|
||||
if cached_result is not None:
|
||||
prepared[i] = self._create_tool_result(call_id, name, {**cached_result, "cached": True})
|
||||
continue
|
||||
|
||||
# Mark as needing actual execution
|
||||
prepared[i] = ("execute", call_id, name, args, cache_key)
|
||||
|
||||
# Separate pre-resolved results from tasks needing execution
|
||||
results: List[dict] = [None] * len(tool_calls)
|
||||
exec_tasks: Dict[int, tuple] = {} # index -> (call_id, name, args, cache_key)
|
||||
|
||||
for i, item in enumerate(prepared):
|
||||
if isinstance(item, dict):
|
||||
results[i] = item
|
||||
elif isinstance(item, tuple) and item[0] == "execute":
|
||||
_, call_id, name, args, cache_key = item
|
||||
exec_tasks[i] = (call_id, name, args, cache_key)
|
||||
|
||||
# Phase 2: execute remaining calls in parallel
|
||||
if exec_tasks:
|
||||
def _run(idx: int, call_id: str, name: str, args: dict, cache_key: str) -> tuple:
|
||||
t0 = time.time()
|
||||
result = self._execute_tool(name, args)
|
||||
elapsed = time.time() - t0
|
||||
|
||||
if result.get("success"):
|
||||
self._set_cache(cache_key, result)
|
||||
|
||||
self._call_history.append({
|
||||
"name": name,
|
||||
"args_str": json.dumps(args, sort_keys=True, ensure_ascii=False),
|
||||
"result": result,
|
||||
})
|
||||
|
||||
return idx, self._create_tool_result(call_id, name, result, execution_time=elapsed)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as pool:
|
||||
futures = {
|
||||
pool.submit(_run, idx, cid, n, a, ck): idx
|
||||
for idx, (cid, n, a, ck) in exec_tasks.items()
|
||||
}
|
||||
for future in as_completed(futures):
|
||||
idx, result_msg = future.result()
|
||||
results[idx] = result_msg
|
||||
|
||||
return results
|
||||
|
||||
def process_tool_calls(
|
||||
self,
|
||||
tool_calls: List[dict],
|
||||
|
|
@ -86,9 +221,7 @@ class ToolExecutor:
|
|||
continue
|
||||
|
||||
# Inject context into tool arguments
|
||||
if context:
|
||||
if name.startswith("file_") and "project_id" in context:
|
||||
args["project_id"] = context["project_id"]
|
||||
self._inject_context(name, args, context)
|
||||
|
||||
# Check for duplicate within same batch
|
||||
call_key = f"{name}:{json.dumps(args, sort_keys=True)}"
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
"""Tool helper services"""
|
||||
from typing import List
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from ddgs import DDGS
|
||||
import re
|
||||
|
||||
|
|
@ -119,19 +120,34 @@ class FetchService:
|
|||
max_concurrent: int = 5
|
||||
) -> List[dict]:
|
||||
"""
|
||||
Batch fetch pages
|
||||
Batch fetch pages concurrently.
|
||||
|
||||
Args:
|
||||
urls: URL list
|
||||
extract_type: Extract type
|
||||
max_concurrent: Max concurrent requests
|
||||
max_concurrent: Max concurrent requests (1-5, default 5)
|
||||
|
||||
Returns:
|
||||
Result list
|
||||
Result list (same order as input URLs)
|
||||
"""
|
||||
results = []
|
||||
for url in urls:
|
||||
results.append(self.fetch(url, extract_type))
|
||||
if len(urls) <= 1:
|
||||
return [self.fetch(url, extract_type) for url in urls]
|
||||
|
||||
max_concurrent = min(max(max_concurrent, 1), 5)
|
||||
results = [None] * len(urls)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=max_concurrent) as pool:
|
||||
futures = {
|
||||
pool.submit(self.fetch, url, extract_type): i
|
||||
for i, url in enumerate(urls)
|
||||
}
|
||||
for future in as_completed(futures):
|
||||
idx = futures[future]
|
||||
try:
|
||||
results[idx] = future.result()
|
||||
except Exception as e:
|
||||
results[idx] = {"error": str(e)}
|
||||
|
||||
return results
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue