refactor: 重构multi agent 参数设置

This commit is contained in:
ViperEkura 2026-03-28 16:43:51 +08:00
parent 3970c0b9a0
commit cc639a979a
7 changed files with 32 additions and 57 deletions

View File

@ -39,8 +39,6 @@ max_iterations: 15
# Sub-agent settings (multi_agent tool)
sub_agent:
max_iterations: 3 # Max tool-call rounds per sub-agent
max_tokens: 4096 # Max tokens per LLM call inside a sub-agent
max_agents: 5 # Max number of concurrent sub-agents per request
max_concurrency: 3 # ThreadPoolExecutor max workers
# Available models

View File

@ -37,15 +37,11 @@ MAX_ITERATIONS = _cfg.get("max_iterations", 5)
# Max parallel workers for tool execution (ThreadPoolExecutor)
TOOL_MAX_WORKERS = _cfg.get("tool_max_workers", 4)
# Max character length for a single tool result content (truncated if exceeded)
TOOL_RESULT_MAX_LENGTH = _cfg.get("tool_result_max_length", 4096)
# Sub-agent settings (multi_agent tool)
_sa = _cfg.get("sub_agent", {})
SUB_AGENT_MAX_ITERATIONS = _sa.get("max_iterations", 3)
SUB_AGENT_MAX_TOKENS = _sa.get("max_tokens", 4096)
SUB_AGENT_MAX_AGENTS = _sa.get("max_agents", 5)
SUB_AGENT_MAX_CONCURRENCY = _sa.get("max_concurrency", 3)
SUB_AGENT_TIMEOUT = _sa.get("timeout", 60)
# Code execution settings
_ce = _cfg.get("code_execution", {})

View File

@ -14,7 +14,7 @@ from backend.utils.helpers import (
build_messages,
)
from backend.services.llm_client import LLMClient
from backend.config import MAX_ITERATIONS, TOOL_MAX_WORKERS, TOOL_RESULT_MAX_LENGTH
from backend.config import MAX_ITERATIONS, TOOL_MAX_WORKERS
logger = logging.getLogger(__name__)
@ -70,7 +70,11 @@ class ChatService:
executor = ToolExecutor(registry=registry)
context = {"model": conv_model}
context = {
"model": conv_model,
"max_tokens": conv_max_tokens,
"temperature": conv_temperature,
}
if project_id:
context["project_id"] = project_id
elif conv.project_id:
@ -332,30 +336,6 @@ class ChatService:
sse_chunks,
)
def _truncate_tool_results(self, tool_results):
"""Truncate oversized tool result content in-place and return the list."""
for tr in tool_results:
if len(tr["content"]) > TOOL_RESULT_MAX_LENGTH:
try:
result_data = json.loads(tr["content"])
original = result_data
except (json.JSONDecodeError, TypeError):
original = None
tr["content"] = json.dumps(
{"success": False, "error": "Tool result too large, truncated"},
ensure_ascii=False,
) if not original else json.dumps(
{
**original,
"truncated": True,
"_note": f"Content truncated, original length {len(tr['content'])} chars",
},
ensure_ascii=False,
default=str,
)[:TOOL_RESULT_MAX_LENGTH]
return tool_results
def _execute_tools_safe(self, app, executor, tool_calls_list, context):
"""Execute tool calls with top-level error wrapping.
@ -365,21 +345,17 @@ class ChatService:
try:
if len(tool_calls_list) > 1:
with app.app_context():
return self._truncate_tool_results(
executor.process_tool_calls_parallel(
return executor.process_tool_calls_parallel(
tool_calls_list, context, max_workers=TOOL_MAX_WORKERS
)
)
else:
with app.app_context():
return self._truncate_tool_results(
executor.process_tool_calls(
return executor.process_tool_calls(
tool_calls_list, context
)
)
except Exception as e:
logger.exception("Error during tool execution")
tool_results = [
return [
{
"role": "tool",
"tool_call_id": tc["id"],
@ -391,7 +367,6 @@ class ChatService:
}
for tc in tool_calls_list
]
return self._truncate_tool_results(tool_results)
def _save_message(
self, app, conv_id, conv_model, msg_id,

View File

@ -7,16 +7,15 @@ import json
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import List, Dict, Any, Optional
from backend.tools import get_service
from backend.tools.factory import tool
from backend.tools.core import registry
from backend.tools.executor import ToolExecutor
from backend.config import (
DEFAULT_MODEL,
SUB_AGENT_MAX_ITERATIONS,
SUB_AGENT_MAX_TOKENS,
SUB_AGENT_MAX_AGENTS,
SUB_AGENT_MAX_CONCURRENCY,
SUB_AGENT_TIMEOUT,
)
logger = logging.getLogger(__name__)
@ -62,6 +61,7 @@ def _run_sub_agent(
tool_names: Optional[List[str]],
model: str,
max_tokens: int,
temperature: float,
project_id: Optional[str],
app: Any,
max_iterations: int = 3,
@ -71,7 +71,6 @@ def _run_sub_agent(
Each sub-agent gets its own ToolExecutor instance and runs a simplified
version of the main agent loop, limited to prevent runaway cost.
"""
from backend.tools import get_service
llm_client = get_service("llm_client")
if not llm_client:
@ -117,9 +116,9 @@ def _run_sub_agent(
# more tools.
tools=None if is_final else (tools if tools else None),
stream=False,
max_tokens=min(max_tokens, 4096),
temperature=0.7,
timeout=60,
max_tokens=max_tokens,
temperature=temperature,
timeout=SUB_AGENT_TIMEOUT,
)
if resp.status_code != 200:
@ -247,8 +246,8 @@ def multi_agent(arguments: dict) -> dict:
tasks = arguments["tasks"]
if len(tasks) > SUB_AGENT_MAX_AGENTS:
return {"success": False, "error": f"Maximum {SUB_AGENT_MAX_AGENTS} concurrent agents allowed"}
if len(tasks) > 5:
return {"success": False, "error": "Maximum 5 concurrent agents allowed"}
# Get current conversation context for model/project info
app = current_app._get_current_object()
@ -256,6 +255,8 @@ def multi_agent(arguments: dict) -> dict:
# Use injected model/project_id from executor context, fall back to defaults
model = arguments.get("_model") or DEFAULT_MODEL
project_id = arguments.get("_project_id")
max_tokens = arguments.get("_max_tokens", 65536)
temperature = arguments.get("_temperature", 0.7)
# Execute agents concurrently
concurrency = min(len(tasks), SUB_AGENT_MAX_CONCURRENCY)
@ -269,7 +270,8 @@ def multi_agent(arguments: dict) -> dict:
task["instruction"],
task.get("tools"),
model,
SUB_AGENT_MAX_TOKENS,
max_tokens,
temperature,
project_id,
app,
SUB_AGENT_MAX_ITERATIONS,

View File

@ -67,6 +67,10 @@ class ToolExecutor:
args["_model"] = context["model"]
if "project_id" in context:
args["_project_id"] = context["project_id"]
if "max_tokens" in context:
args["_max_tokens"] = context["max_tokens"]
if "temperature" in context:
args["_temperature"] = context["temperature"]
def _prepare_call(
self,

View File

@ -1022,10 +1022,9 @@ frontend_port: 4000
max_iterations: 15
# 子代理资源配置multi_agent 工具)
# max_tokens 和 temperature 与主 Agent 共用,无需单独配置
sub_agent:
max_iterations: 3 # 每个子代理的最大工具调用轮数
max_tokens: 4096 # 每次调用的最大 token 数
max_agents: 5 # 每次请求最多派生的子代理数
max_concurrency: 3 # 并发线程数
# 可用模型列表(每个模型必须指定 api_url 和 api_key

View File

@ -319,10 +319,11 @@ execute_python({
| 配置项 | 默认值 | 说明 |
|--------|--------|------|
| `max_iterations` | 3 | 每个子代理的最大工具调用轮数 |
| `max_tokens` | 4096 | 每次调用的最大 token 数 |
| `max_agents` | 5 | 每次请求最多派生的子代理数 |
| `max_concurrency` | 3 | ThreadPoolExecutor 并发线程数 |
> - `max_tokens``temperature` 与主 Agent 共用,从对话配置中获取,无需单独配置。
> - 子代理禁止调用 `multi_agent` 工具,防止无限递归。
---
## 六、核心特性