refactor: 重构multi agent 参数设置

2026-03-28 16:43:51 +08:00 · 2026-03-28 16:43:51 +08:00 · cc639a979a
parent 3970c0b9a0
commit cc639a979a
7 changed files with 32 additions and 57 deletions
--- a/README.md
+++ b/README.md
@ -39,8 +39,6 @@ max_iterations: 15
 # Sub-agent settings (multi_agent tool)
 sub_agent:
  max_iterations: 3      # Max tool-call rounds per sub-agent
-  max_tokens: 4096        # Max tokens per LLM call inside a sub-agent
-  max_agents: 5           # Max number of concurrent sub-agents per request
  max_concurrency: 3      # ThreadPoolExecutor max workers

 # Available models
--- a/backend/config.py
+++ b/backend/config.py
@ -37,15 +37,11 @@ MAX_ITERATIONS = _cfg.get("max_iterations", 5)
 # Max parallel workers for tool execution (ThreadPoolExecutor)
 TOOL_MAX_WORKERS = _cfg.get("tool_max_workers", 4)

-# Max character length for a single tool result content (truncated if exceeded)
-TOOL_RESULT_MAX_LENGTH = _cfg.get("tool_result_max_length", 4096)
-
 # Sub-agent settings (multi_agent tool)
 _sa = _cfg.get("sub_agent", {})
 SUB_AGENT_MAX_ITERATIONS = _sa.get("max_iterations", 3)
-SUB_AGENT_MAX_TOKENS = _sa.get("max_tokens", 4096)
-SUB_AGENT_MAX_AGENTS = _sa.get("max_agents", 5)
 SUB_AGENT_MAX_CONCURRENCY = _sa.get("max_concurrency", 3)
+SUB_AGENT_TIMEOUT = _sa.get("timeout", 60)

 # Code execution settings
 _ce = _cfg.get("code_execution", {})
--- a/backend/services/chat.py
+++ b/backend/services/chat.py
@ -14,7 +14,7 @@ from backend.utils.helpers import (
    build_messages,
 )
 from backend.services.llm_client import LLMClient
-from backend.config import MAX_ITERATIONS, TOOL_MAX_WORKERS, TOOL_RESULT_MAX_LENGTH
+from backend.config import MAX_ITERATIONS, TOOL_MAX_WORKERS

 logger = logging.getLogger(__name__)

@ -70,7 +70,11 @@ class ChatService:

        executor = ToolExecutor(registry=registry)

-        context = {"model": conv_model}
+        context = {
+            "model": conv_model,
+            "max_tokens": conv_max_tokens,
+            "temperature": conv_temperature,
+        }
        if project_id:
            context["project_id"] = project_id
        elif conv.project_id:
@ -332,30 +336,6 @@ class ChatService:
            sse_chunks,
        )

-    def _truncate_tool_results(self, tool_results):
-        """Truncate oversized tool result content in-place and return the list."""
-        for tr in tool_results:
-            if len(tr["content"]) > TOOL_RESULT_MAX_LENGTH:
-                try:
-                    result_data = json.loads(tr["content"])
-                    original = result_data
-                except (json.JSONDecodeError, TypeError):
-                    original = None
-
-                tr["content"] = json.dumps(
-                    {"success": False, "error": "Tool result too large, truncated"},
-                    ensure_ascii=False,
-                ) if not original else json.dumps(
-                    {
-                        **original,
-                        "truncated": True,
-                        "_note": f"Content truncated, original length {len(tr['content'])} chars",
-                    },
-                    ensure_ascii=False,
-                    default=str,
-                )[:TOOL_RESULT_MAX_LENGTH]
-        return tool_results
-
    def _execute_tools_safe(self, app, executor, tool_calls_list, context):
        """Execute tool calls with top-level error wrapping.

@ -365,21 +345,17 @@ class ChatService:
        try:
            if len(tool_calls_list) > 1:
                with app.app_context():
-                    return self._truncate_tool_results(
-                        executor.process_tool_calls_parallel(
+                    return executor.process_tool_calls_parallel(
                        tool_calls_list, context, max_workers=TOOL_MAX_WORKERS
                    )
-                    )
            else:
                with app.app_context():
-                    return self._truncate_tool_results(
-                        executor.process_tool_calls(
+                    return executor.process_tool_calls(
                        tool_calls_list, context
                    )
-                    )
        except Exception as e:
            logger.exception("Error during tool execution")
-            tool_results = [
+            return [
                {
                    "role": "tool",
                    "tool_call_id": tc["id"],
@ -391,7 +367,6 @@ class ChatService:
                }
                for tc in tool_calls_list
            ]
-            return self._truncate_tool_results(tool_results)

    def _save_message(
        self, app, conv_id, conv_model, msg_id,
--- a/backend/tools/builtin/agent.py
+++ b/backend/tools/builtin/agent.py
@ -7,16 +7,15 @@ import json
 import logging
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import List, Dict, Any, Optional
-
+from backend.tools import get_service
 from backend.tools.factory import tool
 from backend.tools.core import registry
 from backend.tools.executor import ToolExecutor
 from backend.config import (
    DEFAULT_MODEL,
    SUB_AGENT_MAX_ITERATIONS,
-    SUB_AGENT_MAX_TOKENS,
-    SUB_AGENT_MAX_AGENTS,
    SUB_AGENT_MAX_CONCURRENCY,
+    SUB_AGENT_TIMEOUT,
 )

 logger = logging.getLogger(__name__)
@ -62,6 +61,7 @@ def _run_sub_agent(
    tool_names: Optional[List[str]],
    model: str,
    max_tokens: int,
+    temperature: float,
    project_id: Optional[str],
    app: Any,
    max_iterations: int = 3,
@ -71,7 +71,6 @@ def _run_sub_agent(
    Each sub-agent gets its own ToolExecutor instance and runs a simplified
    version of the main agent loop, limited to prevent runaway cost.
    """
-    from backend.tools import get_service

    llm_client = get_service("llm_client")
    if not llm_client:
@ -117,9 +116,9 @@ def _run_sub_agent(
                    # more tools.
                    tools=None if is_final else (tools if tools else None),
                    stream=False,
-                    max_tokens=min(max_tokens, 4096),
-                    temperature=0.7,
-                    timeout=60,
+                    max_tokens=max_tokens,
+                    temperature=temperature,
+                    timeout=SUB_AGENT_TIMEOUT,
                )

            if resp.status_code != 200:
@ -247,8 +246,8 @@ def multi_agent(arguments: dict) -> dict:

    tasks = arguments["tasks"]

-    if len(tasks) > SUB_AGENT_MAX_AGENTS:
-        return {"success": False, "error": f"Maximum {SUB_AGENT_MAX_AGENTS} concurrent agents allowed"}
+    if len(tasks) > 5:
+        return {"success": False, "error": "Maximum 5 concurrent agents allowed"}

    # Get current conversation context for model/project info
    app = current_app._get_current_object()
@ -256,6 +255,8 @@ def multi_agent(arguments: dict) -> dict:
    # Use injected model/project_id from executor context, fall back to defaults
    model = arguments.get("_model") or DEFAULT_MODEL
    project_id = arguments.get("_project_id")
+    max_tokens = arguments.get("_max_tokens", 65536)
+    temperature = arguments.get("_temperature", 0.7)

    # Execute agents concurrently
    concurrency = min(len(tasks), SUB_AGENT_MAX_CONCURRENCY)
@ -269,7 +270,8 @@ def multi_agent(arguments: dict) -> dict:
                task["instruction"],
                task.get("tools"),
                model,
-                SUB_AGENT_MAX_TOKENS,
+                max_tokens,
+                temperature,
                project_id,
                app,
                SUB_AGENT_MAX_ITERATIONS,
--- a/backend/tools/executor.py
+++ b/backend/tools/executor.py
@ -67,6 +67,10 @@ class ToolExecutor:
                args["_model"] = context["model"]
            if "project_id" in context:
                args["_project_id"] = context["project_id"]
+            if "max_tokens" in context:
+                args["_max_tokens"] = context["max_tokens"]
+            if "temperature" in context:
+                args["_temperature"] = context["temperature"]

    def _prepare_call(
        self,
--- a/docs/Design.md
+++ b/docs/Design.md
@ -1022,10 +1022,9 @@ frontend_port: 4000
 max_iterations: 15

 # 子代理资源配置（multi_agent 工具）
+# max_tokens 和 temperature 与主 Agent 共用，无需单独配置
 sub_agent:
  max_iterations: 3      # 每个子代理的最大工具调用轮数
-  max_tokens: 4096        # 每次调用的最大 token 数
-  max_agents: 5           # 每次请求最多派生的子代理数
  max_concurrency: 3      # 并发线程数

 # 可用模型列表（每个模型必须指定 api_url 和 api_key）
--- a/docs/ToolSystemDesign.md
+++ b/docs/ToolSystemDesign.md
@ -319,10 +319,11 @@ execute_python({
 | 配置项 | 默认值 | 说明 |
 |--------|--------|------|
 | `max_iterations` | 3 | 每个子代理的最大工具调用轮数 |
-| `max_tokens` | 4096 | 每次调用的最大 token 数 |
-| `max_agents` | 5 | 每次请求最多派生的子代理数 |
 | `max_concurrency` | 3 | ThreadPoolExecutor 并发线程数 |

+> - `max_tokens` 和 `temperature` 与主 Agent 共用，从对话配置中获取，无需单独配置。
+> - 子代理禁止调用 `multi_agent` 工具，防止无限递归。
+
 ---

 ## 六、核心特性