diff --git a/README.md b/README.md
index b625da9..01c8d3e 100644
--- a/README.md
+++ b/README.md
@@ -39,8 +39,6 @@ max_iterations: 15
 # Sub-agent settings (multi_agent tool)
 sub_agent:
   max_iterations: 3      # Max tool-call rounds per sub-agent
-  max_tokens: 4096        # Max tokens per LLM call inside a sub-agent
-  max_agents: 5           # Max number of concurrent sub-agents per request
   max_concurrency: 3      # ThreadPoolExecutor max workers
 
 # Available models
diff --git a/backend/config.py b/backend/config.py
index db5e927..fce9968 100644
--- a/backend/config.py
+++ b/backend/config.py
@@ -37,15 +37,11 @@ MAX_ITERATIONS = _cfg.get("max_iterations", 5)
 # Max parallel workers for tool execution (ThreadPoolExecutor)
 TOOL_MAX_WORKERS = _cfg.get("tool_max_workers", 4)
 
-# Max character length for a single tool result content (truncated if exceeded)
-TOOL_RESULT_MAX_LENGTH = _cfg.get("tool_result_max_length", 4096)
-
 # Sub-agent settings (multi_agent tool)
 _sa = _cfg.get("sub_agent", {})
 SUB_AGENT_MAX_ITERATIONS = _sa.get("max_iterations", 3)
-SUB_AGENT_MAX_TOKENS = _sa.get("max_tokens", 4096)
-SUB_AGENT_MAX_AGENTS = _sa.get("max_agents", 5)
 SUB_AGENT_MAX_CONCURRENCY = _sa.get("max_concurrency", 3)
+SUB_AGENT_TIMEOUT = _sa.get("timeout", 60)
 
 # Code execution settings
 _ce = _cfg.get("code_execution", {})
diff --git a/backend/services/chat.py b/backend/services/chat.py
index d7b0d2e..c8bb3c3 100644
--- a/backend/services/chat.py
+++ b/backend/services/chat.py
@@ -14,7 +14,7 @@ from backend.utils.helpers import (
     build_messages,
 )
 from backend.services.llm_client import LLMClient
-from backend.config import MAX_ITERATIONS, TOOL_MAX_WORKERS, TOOL_RESULT_MAX_LENGTH
+from backend.config import MAX_ITERATIONS, TOOL_MAX_WORKERS
 
 logger = logging.getLogger(__name__)
 
@@ -70,7 +70,11 @@ class ChatService:
 
         executor = ToolExecutor(registry=registry)
 
-        context = {"model": conv_model}
+        context = {
+            "model": conv_model,
+            "max_tokens": conv_max_tokens,
+            "temperature": conv_temperature,
+        }
         if project_id:
             context["project_id"] = project_id
         elif conv.project_id:
@@ -332,30 +336,6 @@ class ChatService:
             sse_chunks,
         )
 
-    def _truncate_tool_results(self, tool_results):
-        """Truncate oversized tool result content in-place and return the list."""
-        for tr in tool_results:
-            if len(tr["content"]) > TOOL_RESULT_MAX_LENGTH:
-                try:
-                    result_data = json.loads(tr["content"])
-                    original = result_data
-                except (json.JSONDecodeError, TypeError):
-                    original = None
-
-                tr["content"] = json.dumps(
-                    {"success": False, "error": "Tool result too large, truncated"},
-                    ensure_ascii=False,
-                ) if not original else json.dumps(
-                    {
-                        **original,
-                        "truncated": True,
-                        "_note": f"Content truncated, original length {len(tr['content'])} chars",
-                    },
-                    ensure_ascii=False,
-                    default=str,
-                )[:TOOL_RESULT_MAX_LENGTH]
-        return tool_results
-
     def _execute_tools_safe(self, app, executor, tool_calls_list, context):
         """Execute tool calls with top-level error wrapping.
 
@@ -365,21 +345,17 @@ class ChatService:
         try:
             if len(tool_calls_list) > 1:
                 with app.app_context():
-                    return self._truncate_tool_results(
-                        executor.process_tool_calls_parallel(
-                            tool_calls_list, context, max_workers=TOOL_MAX_WORKERS
-                        )
+                    return executor.process_tool_calls_parallel(
+                        tool_calls_list, context, max_workers=TOOL_MAX_WORKERS
                     )
             else:
                 with app.app_context():
-                    return self._truncate_tool_results(
-                        executor.process_tool_calls(
-                            tool_calls_list, context
-                        )
+                    return executor.process_tool_calls(
+                        tool_calls_list, context
                     )
         except Exception as e:
             logger.exception("Error during tool execution")
-            tool_results = [
+            return [
                 {
                     "role": "tool",
                     "tool_call_id": tc["id"],
@@ -391,7 +367,6 @@ class ChatService:
                 }
                 for tc in tool_calls_list
             ]
-            return self._truncate_tool_results(tool_results)
 
     def _save_message(
         self, app, conv_id, conv_model, msg_id,
diff --git a/backend/tools/builtin/agent.py b/backend/tools/builtin/agent.py
index 503a8ef..ad2d59c 100644
--- a/backend/tools/builtin/agent.py
+++ b/backend/tools/builtin/agent.py
@@ -7,16 +7,15 @@ import json
 import logging
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import List, Dict, Any, Optional
-
+from backend.tools import get_service
 from backend.tools.factory import tool
 from backend.tools.core import registry
 from backend.tools.executor import ToolExecutor
 from backend.config import (
     DEFAULT_MODEL,
     SUB_AGENT_MAX_ITERATIONS,
-    SUB_AGENT_MAX_TOKENS,
-    SUB_AGENT_MAX_AGENTS,
     SUB_AGENT_MAX_CONCURRENCY,
+    SUB_AGENT_TIMEOUT,
 )
 
 logger = logging.getLogger(__name__)
@@ -62,6 +61,7 @@ def _run_sub_agent(
     tool_names: Optional[List[str]],
     model: str,
     max_tokens: int,
+    temperature: float,
     project_id: Optional[str],
     app: Any,
     max_iterations: int = 3,
@@ -71,7 +71,6 @@ def _run_sub_agent(
     Each sub-agent gets its own ToolExecutor instance and runs a simplified
     version of the main agent loop, limited to prevent runaway cost.
     """
-    from backend.tools import get_service
 
     llm_client = get_service("llm_client")
     if not llm_client:
@@ -117,9 +116,9 @@ def _run_sub_agent(
                     # more tools.
                     tools=None if is_final else (tools if tools else None),
                     stream=False,
-                    max_tokens=min(max_tokens, 4096),
-                    temperature=0.7,
-                    timeout=60,
+                    max_tokens=max_tokens,
+                    temperature=temperature,
+                    timeout=SUB_AGENT_TIMEOUT,
                 )
 
             if resp.status_code != 200:
@@ -247,8 +246,8 @@ def multi_agent(arguments: dict) -> dict:
 
     tasks = arguments["tasks"]
 
-    if len(tasks) > SUB_AGENT_MAX_AGENTS:
-        return {"success": False, "error": f"Maximum {SUB_AGENT_MAX_AGENTS} concurrent agents allowed"}
+    if len(tasks) > 5:
+        return {"success": False, "error": "Maximum 5 concurrent agents allowed"}
 
     # Get current conversation context for model/project info
     app = current_app._get_current_object()
@@ -256,6 +255,8 @@ def multi_agent(arguments: dict) -> dict:
     # Use injected model/project_id from executor context, fall back to defaults
     model = arguments.get("_model") or DEFAULT_MODEL
     project_id = arguments.get("_project_id")
+    max_tokens = arguments.get("_max_tokens", 65536)
+    temperature = arguments.get("_temperature", 0.7)
 
     # Execute agents concurrently
     concurrency = min(len(tasks), SUB_AGENT_MAX_CONCURRENCY)
@@ -269,7 +270,8 @@ def multi_agent(arguments: dict) -> dict:
                 task["instruction"],
                 task.get("tools"),
                 model,
-                SUB_AGENT_MAX_TOKENS,
+                max_tokens,
+                temperature,
                 project_id,
                 app,
                 SUB_AGENT_MAX_ITERATIONS,
diff --git a/backend/tools/executor.py b/backend/tools/executor.py
index 926e156..f74ea07 100644
--- a/backend/tools/executor.py
+++ b/backend/tools/executor.py
@@ -67,6 +67,10 @@ class ToolExecutor:
                 args["_model"] = context["model"]
             if "project_id" in context:
                 args["_project_id"] = context["project_id"]
+            if "max_tokens" in context:
+                args["_max_tokens"] = context["max_tokens"]
+            if "temperature" in context:
+                args["_temperature"] = context["temperature"]
 
     def _prepare_call(
         self,
diff --git a/docs/Design.md b/docs/Design.md
index fe16f1a..a6efe1c 100644
--- a/docs/Design.md
+++ b/docs/Design.md
@@ -1022,10 +1022,9 @@ frontend_port: 4000
 max_iterations: 15
 
 # 子代理资源配置（multi_agent 工具）
+# max_tokens 和 temperature 与主 Agent 共用，无需单独配置
 sub_agent:
   max_iterations: 3      # 每个子代理的最大工具调用轮数
-  max_tokens: 4096        # 每次调用的最大 token 数
-  max_agents: 5           # 每次请求最多派生的子代理数
   max_concurrency: 3      # 并发线程数
 
 # 可用模型列表（每个模型必须指定 api_url 和 api_key）
diff --git a/docs/ToolSystemDesign.md b/docs/ToolSystemDesign.md
index fdb2477..7b25f03 100644
--- a/docs/ToolSystemDesign.md
+++ b/docs/ToolSystemDesign.md
@@ -319,10 +319,11 @@ execute_python({
 | 配置项 | 默认值 | 说明 |
 |--------|--------|------|
 | `max_iterations` | 3 | 每个子代理的最大工具调用轮数 |
-| `max_tokens` | 4096 | 每次调用的最大 token 数 |
-| `max_agents` | 5 | 每次请求最多派生的子代理数 |
 | `max_concurrency` | 3 | ThreadPoolExecutor 并发线程数 |
 
+> - `max_tokens` 和 `temperature` 与主 Agent 共用，从对话配置中获取，无需单独配置。
+> - 子代理禁止调用 `multi_agent` 工具，防止无限递归。
+
 ---
 
 ## 六、核心特性