diff --git a/backend/routes/__init__.py b/backend/routes/__init__.py
index 5996c11..4919ba9 100644
--- a/backend/routes/__init__.py
+++ b/backend/routes/__init__.py
@@ -17,7 +17,7 @@ def register_routes(app: Flask):
client = LLMClient(MODEL_CONFIG)
init_chat_service(client)
- # Register LLM client in service locator so tools (e.g. agent_task) can access it
+ # Register LLM client in service locator so tools (e.g. multi_agent) can access it
from backend.tools import register_service
register_service("llm_client", client)
diff --git a/backend/services/chat.py b/backend/services/chat.py
index 3d85987..d7b0d2e 100644
--- a/backend/services/chat.py
+++ b/backend/services/chat.py
@@ -61,6 +61,9 @@ class ChatService:
"""
conv_id = conv.id
conv_model = conv.model
+ conv_max_tokens = conv.max_tokens
+ conv_temperature = conv.temperature
+ conv_thinking_enabled = conv.thinking_enabled
app = current_app._get_current_object()
tools = registry.list_all() if tools_enabled else None
initial_messages = build_messages(conv, project_id)
@@ -85,7 +88,9 @@ class ChatService:
for iteration in range(MAX_ITERATIONS):
try:
stream_result = self._stream_llm_response(
- app, conv_id, messages, tools, tool_choice, step_index
+ app, messages, tools, tool_choice, step_index,
+ conv_model, conv_max_tokens, conv_temperature,
+ conv_thinking_enabled,
)
except requests.exceptions.HTTPError as e:
resp = e.response
@@ -185,7 +190,7 @@ class ChatService:
# Append assistant message + tool results for the next iteration
messages.append({
"role": "assistant",
- "content": full_content or None,
+ "content": full_content or "",
"tool_calls": tool_calls_list,
})
messages.extend(tool_results)
@@ -232,7 +237,8 @@ class ChatService:
# ------------------------------------------------------------------
def _stream_llm_response(
- self, app, conv_id, messages, tools, tool_choice, step_index
+ self, app, messages, tools, tool_choice, step_index,
+ model, max_tokens, temperature, thinking_enabled,
):
"""Call LLM streaming API and parse the response.
@@ -253,13 +259,12 @@ class ChatService:
sse_chunks = [] # Collect SSE events to yield later
with app.app_context():
- active_conv = db.session.get(Conversation, conv_id)
resp = self.llm.call(
- model=active_conv.model,
+ model=model,
messages=messages,
- max_tokens=active_conv.max_tokens,
- temperature=active_conv.temperature,
- thinking_enabled=active_conv.thinking_enabled,
+ max_tokens=max_tokens,
+ temperature=temperature,
+ thinking_enabled=thinking_enabled,
tools=tools,
tool_choice=tool_choice,
stream=True,
@@ -327,39 +332,8 @@ class ChatService:
sse_chunks,
)
- def _execute_tools_safe(self, app, executor, tool_calls_list, context):
- """Execute tool calls with top-level error wrapping.
-
- If an unexpected exception occurs during tool execution, it is
- converted into error tool results instead of crashing the stream.
- """
- try:
- if len(tool_calls_list) > 1:
- with app.app_context():
- tool_results = executor.process_tool_calls_parallel(
- tool_calls_list, context, max_workers=TOOL_MAX_WORKERS
- )
- else:
- with app.app_context():
- tool_results = executor.process_tool_calls(
- tool_calls_list, context
- )
- except Exception as e:
- logger.exception("Error during tool execution")
- tool_results = [
- {
- "role": "tool",
- "tool_call_id": tc["id"],
- "name": tc["function"]["name"],
- "content": json.dumps({
- "success": False,
- "error": f"Tool execution failed: {e}",
- }, ensure_ascii=False),
- }
- for tc in tool_calls_list
- ]
-
- # Truncate oversized tool result content
+ def _truncate_tool_results(self, tool_results):
+ """Truncate oversized tool result content in-place and return the list."""
for tr in tool_results:
if len(tr["content"]) > TOOL_RESULT_MAX_LENGTH:
try:
@@ -380,9 +354,45 @@ class ChatService:
ensure_ascii=False,
default=str,
)[:TOOL_RESULT_MAX_LENGTH]
-
return tool_results
+ def _execute_tools_safe(self, app, executor, tool_calls_list, context):
+ """Execute tool calls with top-level error wrapping.
+
+ If an unexpected exception occurs during tool execution, it is
+ converted into error tool results instead of crashing the stream.
+ """
+ try:
+ if len(tool_calls_list) > 1:
+ with app.app_context():
+ return self._truncate_tool_results(
+ executor.process_tool_calls_parallel(
+ tool_calls_list, context, max_workers=TOOL_MAX_WORKERS
+ )
+ )
+ else:
+ with app.app_context():
+ return self._truncate_tool_results(
+ executor.process_tool_calls(
+ tool_calls_list, context
+ )
+ )
+ except Exception as e:
+ logger.exception("Error during tool execution")
+ tool_results = [
+ {
+ "role": "tool",
+ "tool_call_id": tc["id"],
+ "name": tc["function"]["name"],
+ "content": json.dumps({
+ "success": False,
+ "error": f"Tool execution failed: {e}",
+ }, ensure_ascii=False),
+ }
+ for tc in tool_calls_list
+ ]
+ return self._truncate_tool_results(tool_results)
+
def _save_message(
self, app, conv_id, conv_model, msg_id,
full_content, all_tool_calls, all_tool_results,
diff --git a/backend/tools/__init__.py b/backend/tools/__init__.py
index 6b436d6..49433b6 100644
--- a/backend/tools/__init__.py
+++ b/backend/tools/__init__.py
@@ -21,7 +21,7 @@ from backend.tools.executor import ToolExecutor
# ---------------------------------------------------------------------------
-# Service locator – allows tools (e.g. agent_task) to access LLM client
+# Service locator – allows tools (e.g. multi_agent) to access LLM client
# ---------------------------------------------------------------------------
_services: dict = {}
diff --git a/backend/tools/builtin/agent.py b/backend/tools/builtin/agent.py
index fef021f..0b3f7a1 100644
--- a/backend/tools/builtin/agent.py
+++ b/backend/tools/builtin/agent.py
@@ -1,8 +1,7 @@
-"""Multi-agent tools for concurrent and batch task execution.
+"""Multi-agent tool for spawning concurrent sub-agents.
Provides:
-- parallel_execute: Run multiple tool calls concurrently
-- agent_task: Spawn sub-agents with their own LLM conversation loops
+- multi_agent: Spawn sub-agents with independent LLM conversation loops
"""
import json
from concurrent.futures import ThreadPoolExecutor, as_completed
@@ -13,118 +12,36 @@ from backend.tools.core import registry
from backend.tools.executor import ToolExecutor
-# ---------------------------------------------------------------------------
-# parallel_execute – run multiple tool calls concurrently
-# ---------------------------------------------------------------------------
+def _to_executor_calls(tool_calls: list, id_prefix: str = "tc") -> list:
+ """Normalize tool calls into executor-compatible format.
-@tool(
- name="parallel_execute",
- description=(
- "Execute multiple tool calls concurrently for better performance. "
- "Use when you have several independent operations that don't depend on each other "
- "(e.g. reading multiple files, running multiple searches, fetching several pages). "
- "Results are returned in the same order as the input."
- ),
- parameters={
- "type": "object",
- "properties": {
- "tool_calls": {
- "type": "array",
- "items": {
- "type": "object",
- "properties": {
- "name": {
- "type": "string",
- "description": "Tool name to execute",
- },
- "arguments": {
- "type": "object",
- "description": "Arguments for the tool",
- },
- },
- "required": ["name", "arguments"],
- },
- "description": "List of tool calls to execute in parallel (max 10)",
- },
- "concurrency": {
- "type": "integer",
- "description": "Max concurrent executions (1-5, default 3)",
- "default": 3,
- },
- },
- "required": ["tool_calls"],
- },
- category="agent",
-)
-def parallel_execute(arguments: dict) -> dict:
- """Execute multiple tool calls concurrently.
-
- Args:
- arguments: {
- "tool_calls": [
- {"name": "file_read", "arguments": {"path": "a.py"}},
- {"name": "web_search", "arguments": {"query": "python"}}
- ],
- "concurrency": 3,
- "_project_id": "..." // injected by executor
- }
-
- Returns:
- {"results": [{index, tool_name, success, data/error}]}
+ Accepts two input shapes:
+ - LLM format: {"function": {"name": ..., "arguments": ...}}
+ - Simple format: {"name": ..., "arguments": ...}
"""
- tool_calls = arguments["tool_calls"]
- concurrency = min(max(arguments.get("concurrency", 3), 1), 5)
-
- if len(tool_calls) > 10:
- return {"success": False, "error": "Maximum 10 tool calls allowed per parallel execution"}
-
- # Build executor context from injected fields
- context = {}
- project_id = arguments.get("_project_id")
- if project_id:
- context["project_id"] = project_id
-
- # Format tool_calls into executor-compatible format
executor_calls = []
for i, tc in enumerate(tool_calls):
- executor_calls.append({
- "id": f"pe-{i}",
- "type": "function",
- "function": {
- "name": tc["name"],
- "arguments": json.dumps(tc["arguments"], ensure_ascii=False),
- },
- })
+ if "function" in tc:
+ func = tc["function"]
+ executor_calls.append({
+ "id": tc.get("id", f"{id_prefix}-{i}"),
+ "type": tc.get("type", "function"),
+ "function": {
+ "name": func["name"],
+ "arguments": func["arguments"],
+ },
+ })
+ else:
+ executor_calls.append({
+ "id": f"{id_prefix}-{i}",
+ "type": "function",
+ "function": {
+ "name": tc["name"],
+ "arguments": json.dumps(tc["arguments"], ensure_ascii=False),
+ },
+ })
+ return executor_calls
- # Use ToolExecutor for proper context injection, caching and dedup
- executor = ToolExecutor(registry=registry, enable_cache=False)
- executor_results = executor.process_tool_calls_parallel(
- executor_calls, context, max_workers=concurrency
- )
-
- # Format output
- results = []
- for er in executor_results:
- try:
- content = json.loads(er["content"]) if isinstance(er["content"], str) else er["content"]
- except (json.JSONDecodeError, TypeError):
- content = {"success": False, "error": "Failed to parse result"}
- results.append({
- "index": len(results),
- "tool_name": er["name"],
- **content,
- })
-
- return {
- "success": True,
- "results": results,
- "total": len(results),
- }
-
-
-# ---------------------------------------------------------------------------
-# agent_task – spawn sub-agents with independent LLM conversation loops
-# ---------------------------------------------------------------------------
def _run_sub_agent(
task_name: str,
@@ -160,7 +77,9 @@ def _run_sub_agent(
tools = all_tools
executor = ToolExecutor(registry=registry)
- context = {"project_id": project_id} if project_id else None
+ context = {"model": model}
+ if project_id:
+ context["project_id"] = project_id
# System prompt: instruction + reminder to give a final text answer
system_msg = (
@@ -170,13 +89,17 @@ def _run_sub_agent(
)
messages = [{"role": "system", "content": system_msg}]
- for _ in range(max_iterations):
+ for i in range(max_iterations):
+ is_final = (i == max_iterations - 1)
try:
with app.app_context():
resp = llm_client.call(
model=model,
messages=messages,
- tools=tools if tools else None,
+ # On the last iteration, don't pass tools so the LLM is
+ # forced to produce a final text response instead of calling
+ # more tools.
+ tools=None if is_final else (tools if tools else None),
stream=False,
max_tokens=min(max_tokens, 4096),
temperature=0.7,
@@ -196,19 +119,15 @@ def _run_sub_agent(
message = choice["message"]
if message.get("tool_calls"):
- messages.append(message)
+ # Only extract needed fields — LLM response may contain extra
+ # fields (e.g. reasoning_content) that the API rejects on re-send
+ messages.append({
+ "role": "assistant",
+ "content": message.get("content") or "",
+ "tool_calls": message["tool_calls"],
+ })
tc_list = message["tool_calls"]
- # Convert OpenAI tool_calls to executor format
- executor_calls = []
- for tc in tc_list:
- executor_calls.append({
- "id": tc.get("id", ""),
- "type": tc.get("type", "function"),
- "function": {
- "name": tc["function"]["name"],
- "arguments": tc["function"]["arguments"],
- },
- })
+ executor_calls = _to_executor_calls(tc_list)
tool_results = executor.process_tool_calls(executor_calls, context)
messages.extend(tool_results)
else:
@@ -226,7 +145,7 @@ def _run_sub_agent(
"error": str(e),
}
- # Exhausted iterations without final response — return last LLM output if any
+ # Exhausted iterations without final response
return {
"task_name": task_name,
"success": True,
@@ -234,49 +153,49 @@ def _run_sub_agent(
}
-# @tool(
-# name="agent_task",
-# description=(
-# "Spawn one or more sub-agents to work on tasks concurrently. "
-# "Each agent runs its own independent conversation with the LLM and can use tools. "
-# "Useful for parallel research, multi-file analysis, or dividing complex tasks into sub-tasks. "
-# "Each agent is limited to 3 iterations and 4096 tokens to control cost."
-# ),
-# parameters={
-# "type": "object",
-# "properties": {
-# "tasks": {
-# "type": "array",
-# "items": {
-# "type": "object",
-# "properties": {
-# "name": {
-# "type": "string",
-# "description": "Short name/identifier for this task",
-# },
-# "instruction": {
-# "type": "string",
-# "description": "Detailed instruction for the sub-agent",
-# },
-# "tools": {
-# "type": "array",
-# "items": {"type": "string"},
-# "description": (
-# "Tool names this agent can use (empty = all tools). "
-# "e.g. ['file_read', 'file_list', 'web_search']"
-# ),
-# },
-# },
-# "required": ["name", "instruction"],
-# },
-# "description": "Tasks for parallel sub-agents (max 5)",
-# },
-# },
-# "required": ["tasks"],
-# },
-# category="agent",
-# )
-def agent_task(arguments: dict) -> dict:
+@tool(
+ name="multi_agent",
+ description=(
+ "Spawn multiple sub-agents to work on tasks concurrently. "
+ "Each agent runs its own independent conversation with the LLM and can use tools. "
+ "Useful for parallel research, multi-file analysis, or dividing complex tasks into sub-tasks. "
+ "Each agent is limited to 3 iterations and 4096 tokens to control cost."
+ ),
+ parameters={
+ "type": "object",
+ "properties": {
+ "tasks": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string",
+ "description": "Short name/identifier for this task",
+ },
+ "instruction": {
+ "type": "string",
+ "description": "Detailed instruction for the sub-agent",
+ },
+ "tools": {
+ "type": "array",
+ "items": {"type": "string"},
+ "description": (
+ "Tool names this agent can use (empty = all tools). "
+ "e.g. ['file_read', 'file_list', 'web_search']"
+ ),
+ },
+ },
+ "required": ["name", "instruction"],
+ },
+ "description": "Tasks for parallel sub-agents (max 5)",
+ },
+ },
+ "required": ["tasks"],
+ },
+ category="agent",
+)
+def multi_agent(arguments: dict) -> dict:
"""Spawn sub-agents to work on tasks concurrently.
Args:
@@ -296,7 +215,7 @@ def agent_task(arguments: dict) -> dict:
}
Returns:
- {"success": true, "results": [{task_name, success, response/error}]}
+ {"success": true, "results": [{task_name, success, response/error}], "total": int}
"""
from flask import current_app
@@ -309,7 +228,8 @@ def agent_task(arguments: dict) -> dict:
app = current_app._get_current_object()
# Use injected model/project_id from executor context, fall back to defaults
- model = arguments.get("_model", "glm-5")
+ from backend.config import DEFAULT_MODEL
+ model = arguments.get("_model") or DEFAULT_MODEL
project_id = arguments.get("_project_id")
# Execute agents concurrently (max 3 at a time)
diff --git a/backend/tools/executor.py b/backend/tools/executor.py
index 6cdb9b8..926e156 100644
--- a/backend/tools/executor.py
+++ b/backend/tools/executor.py
@@ -56,21 +56,80 @@ class ToolExecutor:
"""Inject context fields into tool arguments in-place.
- file_* tools: inject project_id
- - agent_task: inject model and project_id (prefixed with _ to avoid collisions)
- - parallel_execute: inject project_id (prefixed with _ to avoid collisions)
+ - agent tools (multi_agent): inject _model and _project_id
"""
if not context:
return
if name.startswith("file_") and "project_id" in context:
args["project_id"] = context["project_id"]
- if name == "agent_task":
+ if name == "multi_agent":
if "model" in context:
args["_model"] = context["model"]
if "project_id" in context:
args["_project_id"] = context["project_id"]
- if name == "parallel_execute":
- if "project_id" in context:
- args["_project_id"] = context["project_id"]
+
+ def _prepare_call(
+ self,
+ call: dict,
+ context: Optional[dict],
+ seen_calls: set,
+ ) -> tuple:
+ """Parse, inject context, check dedup/cache for a single tool call.
+
+ Returns a tagged tuple:
+ ("error", call_id, name, error_msg)
+ ("cached", call_id, name, result_dict) -- dedup or cache hit
+ ("execute", call_id, name, args, cache_key)
+ """
+ name = call["function"]["name"]
+ args_str = call["function"]["arguments"]
+ call_id = call["id"]
+
+ # Parse JSON arguments
+ try:
+ args = json.loads(args_str) if isinstance(args_str, str) else args_str
+ except json.JSONDecodeError:
+ return ("error", call_id, name, "Invalid JSON arguments")
+
+ # Inject context
+ self._inject_context(name, args, context)
+
+ # Dedup within same batch
+ call_key = f"{name}:{json.dumps(args, sort_keys=True)}"
+ if call_key in seen_calls:
+ return ("cached", call_id, name,
+ {"success": True, "data": None, "cached": True, "duplicate": True})
+ seen_calls.add(call_key)
+
+ # History dedup
+ history_result = self._check_duplicate_in_history(name, args)
+ if history_result is not None:
+ return ("cached", call_id, name, {**history_result, "cached": True})
+
+ # Cache check
+ cache_key = self._make_cache_key(name, args)
+ cached_result = self._get_cached(cache_key)
+ if cached_result is not None:
+ return ("cached", call_id, name, {**cached_result, "cached": True})
+
+ return ("execute", call_id, name, args, cache_key)
+
+ def _execute_and_record(
+ self,
+ name: str,
+ args: dict,
+ cache_key: str,
+ ) -> dict:
+ """Execute a tool, cache result, record history, and return raw result dict."""
+ result = self._execute_tool(name, args)
+ if result.get("success"):
+ self._set_cache(cache_key, result)
+ self._call_history.append({
+ "name": name,
+ "args_str": json.dumps(args, sort_keys=True, ensure_ascii=False),
+ "result": result,
+ })
+ return result
def process_tool_calls_parallel(
self,
@@ -81,10 +140,6 @@ class ToolExecutor:
"""
Process tool calls concurrently and return message list (ordered by input).
- Identical logic to process_tool_calls but uses ThreadPoolExecutor so that
- independent tool calls (e.g. reading 3 files, running 2 searches) execute
- in parallel instead of sequentially.
-
Args:
tool_calls: Tool call list returned by LLM
context: Optional context info (user_id, project_id, etc.)
@@ -98,80 +153,31 @@ class ToolExecutor:
max_workers = min(max(max_workers, 1), 6)
- # Phase 1: prepare each call (parse args, inject context, check dedup/cache)
- # This phase is fast and sequential – it must be done before parallelism
- # to avoid race conditions on seen_calls / _call_history / _cache.
- prepared: List[Optional[tuple]] = [None] * len(tool_calls)
- seen_calls: set = set()
+ # Phase 1: prepare (sequential – avoids race conditions on shared state)
+ prepared = [self._prepare_call(call, context, set()) for call in tool_calls]
- for i, call in enumerate(tool_calls):
- name = call["function"]["name"]
- args_str = call["function"]["arguments"]
- call_id = call["id"]
-
- # Parse JSON arguments
- try:
- args = json.loads(args_str) if isinstance(args_str, str) else args_str
- except json.JSONDecodeError:
- prepared[i] = self._create_error_result(call_id, name, "Invalid JSON arguments")
- continue
-
- # Inject context into tool arguments
- self._inject_context(name, args, context)
-
- # Dedup within same batch
- call_key = f"{name}:{json.dumps(args, sort_keys=True)}"
- if call_key in seen_calls:
- prepared[i] = self._create_tool_result(
- call_id, name,
- {"success": True, "data": None, "cached": True, "duplicate": True}
- )
- continue
- seen_calls.add(call_key)
-
- # History dedup
- history_result = self._check_duplicate_in_history(name, args)
- if history_result is not None:
- prepared[i] = self._create_tool_result(call_id, name, {**history_result, "cached": True})
- continue
-
- # Cache check
- cache_key = self._make_cache_key(name, args)
- cached_result = self._get_cached(cache_key)
- if cached_result is not None:
- prepared[i] = self._create_tool_result(call_id, name, {**cached_result, "cached": True})
- continue
-
- # Mark as needing actual execution
- prepared[i] = ("execute", call_id, name, args, cache_key)
-
- # Separate pre-resolved results from tasks needing execution
+ # Phase 2: separate pre-resolved from tasks needing execution
results: List[dict] = [None] * len(tool_calls)
- exec_tasks: Dict[int, tuple] = {} # index -> (call_id, name, args, cache_key)
+ exec_tasks: Dict[int, tuple] = {}
for i, item in enumerate(prepared):
- if isinstance(item, dict):
- results[i] = item
- elif isinstance(item, tuple) and item[0] == "execute":
+ tag = item[0]
+ if tag == "error":
+ _, call_id, name, error_msg = item
+ results[i] = self._create_error_result(call_id, name, error_msg)
+ elif tag == "cached":
+ _, call_id, name, result_dict = item
+ results[i] = self._create_tool_result(call_id, name, result_dict)
+ else: # "execute"
_, call_id, name, args, cache_key = item
exec_tasks[i] = (call_id, name, args, cache_key)
- # Phase 2: execute remaining calls in parallel
+ # Phase 3: execute remaining calls in parallel
if exec_tasks:
def _run(idx: int, call_id: str, name: str, args: dict, cache_key: str) -> tuple:
t0 = time.time()
- result = self._execute_tool(name, args)
+ result = self._execute_and_record(name, args, cache_key)
elapsed = time.time() - t0
-
- if result.get("success"):
- self._set_cache(cache_key, result)
-
- self._call_history.append({
- "name": name,
- "args_str": json.dumps(args, sort_keys=True, ensure_ascii=False),
- "result": result,
- })
-
return idx, self._create_tool_result(call_id, name, result, execution_time=elapsed)
with ThreadPoolExecutor(max_workers=max_workers) as pool:
@@ -201,65 +207,22 @@ class ToolExecutor:
Tool response message list, can be appended to messages
"""
results = []
- seen_calls = set() # Track calls within this batch
+ seen_calls: set = set()
for call in tool_calls:
- name = call["function"]["name"]
- args_str = call["function"]["arguments"]
- call_id = call["id"]
+ prepared = self._prepare_call(call, context, seen_calls)
+ tag = prepared[0]
- try:
- args = json.loads(args_str) if isinstance(args_str, str) else args_str
- except json.JSONDecodeError:
- results.append(self._create_error_result(
- call_id, name, "Invalid JSON arguments"
- ))
- continue
-
- # Inject context into tool arguments
- self._inject_context(name, args, context)
-
- # Check for duplicate within same batch
- call_key = f"{name}:{json.dumps(args, sort_keys=True)}"
- if call_key in seen_calls:
- # Skip duplicate, but still return a result
- results.append(self._create_tool_result(
- call_id, name,
- {"success": True, "data": None, "cached": True, "duplicate": True}
- ))
- continue
- seen_calls.add(call_key)
-
- # Check history for previous call in this session
- history_result = self._check_duplicate_in_history(name, args)
- if history_result is not None:
- result = {**history_result, "cached": True}
+ if tag == "error":
+ _, call_id, name, error_msg = prepared
+ results.append(self._create_error_result(call_id, name, error_msg))
+ elif tag == "cached":
+ _, call_id, name, result_dict = prepared
+ results.append(self._create_tool_result(call_id, name, result_dict))
+ else: # "execute"
+ _, call_id, name, args, cache_key = prepared
+ result = self._execute_and_record(name, args, cache_key)
results.append(self._create_tool_result(call_id, name, result))
- continue
-
- # Check cache
- cache_key = self._make_cache_key(name, args)
- cached_result = self._get_cached(cache_key)
- if cached_result is not None:
- result = {**cached_result, "cached": True}
- results.append(self._create_tool_result(call_id, name, result))
- continue
-
- # Execute tool with retry
- result = self._execute_tool(name, args)
-
- # Cache the result (only cache successful results)
- if result.get("success"):
- self._set_cache(cache_key, result)
-
- # Add to history
- self._call_history.append({
- "name": name,
- "args_str": json.dumps(args, sort_keys=True, ensure_ascii=False),
- "result": result
- })
-
- results.append(self._create_tool_result(call_id, name, result))
return results
diff --git a/backend/utils/helpers.py b/backend/utils/helpers.py
index a9761b4..6691002 100644
--- a/backend/utils/helpers.py
+++ b/backend/utils/helpers.py
@@ -133,6 +133,13 @@ def build_messages(conv, project_id=None):
# Query messages directly to avoid detached instance warning
messages = Message.query.filter_by(conversation_id=conv.id).order_by(Message.created_at.asc()).all()
for m in messages:
+ # Skip tool messages — they are ephemeral intermediate results, not
+ # meant to be replayed as conversation history (would violate the API
+ # protocol that requires tool messages to follow an assistant message
+ # with matching tool_calls).
+ if m.role == "tool":
+ continue
+
# Build full content from JSON structure
full_content = m.content
try:
diff --git a/docs/Design.md b/docs/Design.md
index f2e8ca8..0647108 100644
--- a/docs/Design.md
+++ b/docs/Design.md
@@ -266,8 +266,8 @@ classDiagram
-ToolRegistry registry
-dict _cache
-list _call_history
- +process_tool_calls(calls, context) list
- +clear_history() void
+ +process_tool_calls(list, dict) list
+ +process_tool_calls_parallel(list, dict, int) list
}
ChatService --> LLMClient : 使用
@@ -295,18 +295,17 @@ classDiagram
+register(ToolDefinition) void
+get(str name) ToolDefinition?
+list_all() list~dict~
- +list_by_category(str) list~dict~
+execute(str name, dict args) dict
- +remove(str name) bool
- +has(str name) bool
}
class ToolExecutor {
-ToolRegistry registry
+ -bool enable_cache
+ -int cache_ttl
-dict _cache
-list _call_history
+process_tool_calls(list, dict) list
- +clear_history() void
+ +process_tool_calls_parallel(list, dict, int) list
}
class ToolResult {
@@ -394,18 +393,19 @@ def validate_path_in_project(path: str, project_dir: Path) -> Path:
工具执行器自动为文件工具注入 `project_id`:
```python
-# backend/tools/executor.py
+# backend/tools/executor.py — _inject_context()
-def process_tool_calls(self, tool_calls, context=None):
- for call in tool_calls:
- name = call["function"]["name"]
- args = json.loads(call["function"]["arguments"])
-
- # 自动注入 project_id
- if context and name.startswith("file_") and "project_id" in context:
- args["project_id"] = context["project_id"]
-
- result = self.registry.execute(name, args)
+@staticmethod
+def _inject_context(name: str, args: dict, context: Optional[dict]) -> None:
+ # file_* 工具: 注入 project_id
+ if name.startswith("file_") and "project_id" in context:
+ args["project_id"] = context["project_id"]
+ # agent 工具: 注入 _model 和 _project_id
+ if name == "multi_agent":
+ if "model" in context:
+ args["_model"] = context["model"]
+ if "project_id" in context:
+ args["_project_id"] = context["project_id"]
```
---
diff --git a/docs/ToolSystemDesign.md b/docs/ToolSystemDesign.md
index b197dc7..1c76288 100644
--- a/docs/ToolSystemDesign.md
+++ b/docs/ToolSystemDesign.md
@@ -27,19 +27,20 @@ classDiagram
+register(ToolDefinition tool) void
+get(str name) ToolDefinition?
+list_all() list~dict~
- +list_by_category(str category) list~dict~
+execute(str name, dict args) dict
- +remove(str name) bool
- +has(str name) bool
}
class ToolExecutor {
-ToolRegistry registry
+ -bool enable_cache
+ -int cache_ttl
-dict _cache
-list _call_history
+process_tool_calls(list tool_calls, dict context) list~dict~
- +build_request(list messages, str model, list tools, dict kwargs) dict
- +clear_history() void
+ +process_tool_calls_parallel(list tool_calls, dict context, int max_workers) list~dict~
+ -_prepare_call(dict call, dict context, set seen_calls) tuple
+ -_execute_and_record(str name, dict args, str cache_key) dict
+ -_inject_context(str name, dict args, dict context) void
}
class ToolResult {
@@ -88,32 +89,26 @@ classDiagram
### context 参数
-`process_tool_calls()` 接受 `context` 参数,用于自动注入工具参数:
+`process_tool_calls()` / `process_tool_calls_parallel()` 接受 `context` 参数,用于自动注入工具参数:
```python
-# backend/tools/executor.py
+# backend/tools/executor.py — _inject_context()
-def process_tool_calls(
- self,
- tool_calls: List[dict],
- context: Optional[dict] = None
-) -> List[dict]:
+@staticmethod
+def _inject_context(name: str, args: dict, context: Optional[dict]) -> None:
"""
- Args:
- tool_calls: LLM 返回的工具调用列表
- context: 上下文信息,支持:
- - project_id: 自动注入到文件工具
+ - file_* 工具: 注入 project_id
+ - agent 工具 (multi_agent): 注入 _model 和 _project_id
"""
- for call in tool_calls:
- name = call["function"]["name"]
- args = json.loads(call["function"]["arguments"])
-
- # 自动注入 project_id 到文件工具
- if context:
- if name.startswith("file_") and "project_id" in context:
- args["project_id"] = context["project_id"]
-
- result = self.registry.execute(name, args)
+ if not context:
+ return
+ if name.startswith("file_") and "project_id" in context:
+ args["project_id"] = context["project_id"]
+ if name == "multi_agent":
+ if "model" in context:
+ args["_model"] = context["model"]
+ if "project_id" in context:
+ args["_project_id"] = context["project_id"]
```
### 使用示例
@@ -122,12 +117,12 @@ def process_tool_calls(
# backend/services/chat.py
def stream_response(self, conv, tools_enabled=True, project_id=None):
- # 构建上下文(优先使用请求传递的 project_id,否则回退到对话绑定的)
- context = None
+ # 构建上下文(包含 model 和 project_id)
+ context = {"model": conv.model}
if project_id:
- context = {"project_id": project_id}
+ context["project_id"] = project_id
elif conv.project_id:
- context = {"project_id": conv.project_id}
+ context["project_id"] = conv.project_id
# 处理工具调用时自动注入
tool_results = self.executor.process_tool_calls(tool_calls, context)
@@ -250,6 +245,19 @@ file_read({"path": "src/main.py", "project_id": "xxx"})
|---------|------|------|
| `get_weather` | 查询天气信息(模拟) | `city`: 城市名称 |
+### 5.6 多智能体工具 (agent)
+
+| 工具名称 | 描述 | 参数 |
+|---------|------|------|
+| `multi_agent` | 派生子 Agent 并发执行任务(最多 5 个) | `tasks`: 任务数组(name, instruction, tools)
`_model`: 模型名称(自动注入)
`_project_id`: 项目 ID(自动注入) |
+
+**`multi_agent` 工作原理:**
+1. 接收任务数组,每个任务指定 name、instruction 和可选的 tools 列表
+2. 为每个子 Agent 创建独立线程,各自拥有 LLM 对话循环(最多 3 轮迭代,4096 tokens)
+3. 通过 Service Locator 获取 `llm_client` 实例
+4. 子 Agent 在 `app.app_context()` 中运行,可独立调用所有注册工具
+5. 返回 `{success, results: [{task_name, success, response/error}], total}`
+
---
## 六、核心特性
@@ -285,7 +293,6 @@ def my_tool(arguments: dict) -> dict:
- **批次内去重**:同一批次中相同工具+参数的调用会被跳过
- **历史去重**:同一会话内已调用过的工具会直接返回缓存结果
-- **自动清理**:新会话开始时调用 `clear_history()` 清理历史
### 6.4 无自动重试
@@ -308,13 +315,45 @@ def my_tool(arguments: dict) -> dict:
def init_tools() -> None:
"""初始化所有内置工具"""
from backend.tools.builtin import (
- code, crawler, data, weather, file_ops
+ code, crawler, data, weather, file_ops, agent
)
```
---
-## 八、扩展新工具
+## 八、Service Locator
+
+工具系统提供 Service Locator 模式,允许工具访问共享服务(如 LLM 客户端):
+
+```python
+# backend/tools/__init__.py
+
+_services: dict = {}
+
+def register_service(name: str, service) -> None:
+ """注册共享服务"""
+ _services[name] = service
+
+def get_service(name: str):
+ """获取已注册的服务,不存在则返回 None"""
+ return _services.get(name)
+```
+
+### 使用方式
+
+```python
+# 在应用初始化时注册(routes/__init__.py)
+from backend.tools import register_service
+register_service("llm_client", llm_client)
+
+# 在工具中使用(agent.py)
+from backend.tools import get_service
+llm_client = get_service("llm_client")
+```
+
+---
+
+## 九、扩展新工具
### 添加新工具