feat: 完善爬虫功能

2026-04-13 08:38:12 +08:00 · 2026-04-13 08:38:12 +08:00 · a84b8617a6
parent 805f8c86da
commit a84b8617a6
17 changed files with 422 additions and 172 deletions
--- a/.gitignore
+++ b/.gitignore
@ -9,7 +9,7 @@
 !README.md
 !.gitignore
-!luxx/**/*.py
+!*.py
 !asserts/**/*.md
 # Dashboard
--- a/asserts/ARCHITECTURE.md
+++ b/asserts/ARCHITECTURE.md
@ -5,9 +5,13 @@
 - **框架**: FastAPI 0.109+
 - **数据库**: SQLAlchemy 2.0+
 - **认证**: JWT (PyJWT)
- **HTTP客户端**: httpx
+- **HTTP客户端**: httpx, requests
 - **配置**: YAML (PyYAML)
 - **代码执行**: Python 原生执行
 - **网页爬虫**:
  - `httpx` - HTTP 客户端
  - `beautifulsoup4` - HTML 解析
  - `lxml` - XML/HTML 解析器
 ## 目录结构
@ -36,6 +40,7 @@ luxx/
 │       ├── crawler.py  # 网页爬虫
 │       ├── data.py     # 数据处理
 │       └── weather.py  # 天气查询
 │   └── services.py     # 工具服务层
 └── utils/              # 工具函数
    └── helpers.py
 ```
@ -205,7 +210,9 @@ classDiagram
 |------|------|------|
 | `python_execute` | 执行 Python 代码 | 支持 print 输出、变量访问 |
 | `python_eval` | 计算表达式 | 快速求值 |
-| `web_crawl` | 网页抓取 | BeautifulSoup + httpx |
+| `web_search` | DuckDuckGo HTML | DuckDuckGo HTML 搜索 |
 | `web_fetch` | 网页抓取 | httpx + BeautifulSoup，支持 text/links/structured |
 | `batch_fetch` | 批量抓取 | 并发获取多个页面 |
 | `get_weather` | 天气查询 | 支持城市名查询 |
 | `process_data` | 数据处理 | JSON 转换、格式化等 |
--- a/config.yaml
+++ b/config.yaml
@ -7,7 +7,7 @@ app:
 database:
  type: sqlite
-  url: sqlite:///../chat.db
+  url: sqlite:///./chat.db
 llm:
  provider: deepseek
--- a/dashboard/src/components/MessageBubble.vue
+++ b/dashboard/src/components/MessageBubble.vue
@ -66,7 +66,9 @@ const renderedContent = computed(() => {
 function formatTime(time) {
  if (!time) return ''
-  return new Date(time).toLocaleTimeString('zh-CN', { hour: '2-digit', minute: '2-digit' })
+  const date = new Date(time)
  // 使用本地时区显示
  return date.toLocaleTimeString('zh-CN', { hour: '2-digit', minute: '2-digit' })
 }
 function copyContent() {
--- a/dashboard/src/components/ProcessBlock.vue
+++ b/dashboard/src/components/ProcessBlock.vue
@ -9,10 +9,11 @@
          <span class="step-label">思考中</span>
          <span class="step-brief">{{ item.brief || '正在思考...' }}</span>
          <span v-if="streaming && item.key === lastThinkingKey" class="loading-dots">...</span>
          <span v-else-if="item.content && item.content.length > 1024" class="truncate-hint">已截断</span>
          <span class="arrow" :class="{ open: expandedKeys.has(item.key) }" v-html="chevronDown"></span>
        </div>
        <div v-if="expandedKeys.has(item.key)" class="step-content">
-          <div class="thinking-text">{{ item.content }}</div>
+          <div class="thinking-text">{{ item.displayContent }}</div>
        </div>
      </div>
@ -25,6 +26,7 @@
          <span v-if="item.loading" class="loading-dots">...</span>
          <span v-else-if="item.isSuccess === true" class="step-badge success">成功</span>
          <span v-else-if="item.isSuccess === false" class="step-badge error">失败</span>
          <span v-if="item.fullResult && item.fullResult.length > 1024" class="truncate-hint">已截断</span>
          <span class="arrow" :class="{ open: expandedKeys.has(item.key) }" v-html="chevronDown"></span>
        </div>
        <div v-if="expandedKeys.has(item.key)" class="step-content">
@ -34,7 +36,7 @@
          </div>
          <div v-if="item.resultSummary || item.fullResult" class="tool-detail" style="margin-top: 8px;">
            <span class="detail-label">结果</span>
-            <pre>{{ item.fullResult || item.resultSummary }}</pre>
+            <pre>{{ item.displayResult }}</pre>
          </div>
        </div>
      </div>
@ -71,12 +73,14 @@ const allItems = computed(() => {
  if (props.processSteps && props.processSteps.length > 0) {
    for (const step of props.processSteps) {
      if (step.type === 'thinking') {
        const content = step.content || ''
        items.push({
          key: step.id || `thinking-${step.index}`,
          type: 'thinking',
          index: step.index,
-          content: step.content || '',
+          content: content,
-          brief: step.content ? step.content.slice(0, 50) + (step.content.length > 50 ? '...' : '') : '',
+          displayContent: content.length > 1024 ? content.slice(0, 1024) + '\n\n[... 内容已截断 ...]' : content,
          brief: content.slice(0, 50) + (content.length > 50 ? '...' : ''),
        })
      } else if (step.type === 'tool_call') {
        items.push({
@ -97,12 +101,15 @@ const allItems = computed(() => {
        const toolId = step.id_ref || step.id
        const match = items.findLast(it => it.type === 'tool_call' && it.id === toolId)
        if (match) {
-          match.resultSummary = step.content ? step.content.slice(0, 200) : ''
+          const resultContent = step.content || ''
-          match.fullResult = step.content || ''
+          match.resultSummary = resultContent.slice(0, 200)
          match.fullResult = resultContent
          match.displayResult = resultContent.length > 1024 ? resultContent.slice(0, 1024) + '\n\n[... 结果已截断 ...]' : resultContent
          match.isSuccess = step.success !== false
          match.loading = false
        } else {
          // 如果没有找到对应的 tool_call，创建一个占位符
          const placeholderContent = step.content || ''
          items.push({
            key: `result-${step.id || step.index}`,
            type: 'tool_call',
@ -113,8 +120,9 @@ const allItems = computed(() => {
            brief: step.name || '工具结果',
            loading: false,
            isSuccess: true,
-            resultSummary: step.content ? step.content.slice(0, 200) : '',
+            resultSummary: placeholderContent.slice(0, 200),
-            fullResult: step.content || ''
+            fullResult: placeholderContent,
            displayResult: placeholderContent.length > 1024 ? placeholderContent.slice(0, 1024) + '\n\n[... 结果已截断 ...]' : placeholderContent
          })
        }
      } else if (step.type === 'text') {
@ -280,6 +288,15 @@ const sparkleIcon = `<svg viewBox="0 0 24 24" width="14" height="14" fill="none"
  color: var(--success-color);
 }
 .truncate-hint {
  font-size: 10px;
  padding: 2px 6px;
  background: var(--warning-bg);
  color: var(--warning-color);
  border-radius: 4px;
  margin-left: 4px;
 }
 .step-badge.error {
  background: var(--danger-bg);
  color: var(--danger-color);
--- a/dashboard/src/style.css
+++ b/dashboard/src/style.css
@ -41,6 +41,8 @@
  /* 状态颜色 */
  --success-color: #059669;
  --success-bg: rgba(16, 185, 129, 0.1);
  --warning-color: #d97706;
  --warning-bg: rgba(217, 119, 6, 0.1);
  --danger-color: #ef4444;
  --danger-bg: rgba(239, 68, 68, 0.08);
@ -112,6 +114,8 @@
  --success-color: #34d399;
  --success-bg: rgba(52, 211, 153, 0.15);
  --warning-color: #fbbf24;
  --warning-bg: rgba(251, 191, 36, 0.15);
  --danger-color: #f87171;
  --danger-bg: rgba(248, 113, 113, 0.15);
--- a/dashboard/src/utils/markdown.js
+++ b/dashboard/src/utils/markdown.js
@ -52,9 +52,9 @@ const blockMathExtension = {
 }
 marked.use({
-  extensions: [blockMathExtension, mathExtension],
+  gfm: true,
  breaks: true,
-  gfm: true
+  extensions: [blockMathExtension, mathExtension]
 })
 export function renderMarkdown(text) {
--- a/dashboard/src/views/ConversationDetailView.vue
+++ b/dashboard/src/views/ConversationDetailView.vue
@ -24,7 +24,7 @@
        </div>
      </div>
-      <div ref="messagesContainer" class="messages-container">
+      <div ref="messagesContainer" class="messages-container" @scroll="handleScroll">
        <div v-if="loading" class="load-more-top">
          <span>加载中...</span>
        </div>
@ -106,6 +106,7 @@ const sending = ref(false)
 const streamingMessage = ref(null)
 const messagesContainer = ref(null)
 const textareaRef = ref(null)
 const autoScroll = ref(true)
 const conversationId = ref(route.params.id)
 const conversationTitle = ref('')
@ -128,6 +129,7 @@ function onKeydown(e) {
 }
 const loadMessages = async () => {
  autoScroll.value = true
  loading.value = true
  try {
    const res = await messagesAPI.list(conversationId.value)
@ -191,6 +193,7 @@ const sendMessage = async () => {
    { conversation_id: conversationId.value, content },
    {
      onProcessStep: (step) => {
        autoScroll.value = true  // 流式开始时启用自动滚动
        if (!streamingMessage.value) return
        // 按 id 更新或追加步骤
        const idx = streamingMessage.value.process_steps.findIndex(s => s.id === step.id)
@ -202,6 +205,7 @@ const sendMessage = async () => {
      },
      onDone: () => {
        // 完成，添加到消息列表
        autoScroll.value = true
        if (streamingMessage.value) {
          messages.value.push({
            ...streamingMessage.value,
@ -230,6 +234,7 @@ const sendMessage = async () => {
 }
 const scrollToBottom = () => {
  if (!autoScroll.value) return
  nextTick(() => {
    if (messagesContainer.value) {
      messagesContainer.value.scrollTo({
@ -240,6 +245,15 @@ const scrollToBottom = () => {
  })
 }
 // 处理滚动事件，检测用户是否手动滚动
 const handleScroll = () => {
  if (!messagesContainer.value) return
  const { scrollTop, scrollHeight, clientHeight } = messagesContainer.value
  const distanceToBottom = scrollHeight - scrollTop - clientHeight
  // 距离底部超过50px时停止自动跟随
  autoScroll.value = distanceToBottom < 50
 }
 // 监听流式消息变化，自动滚动
 watch(() => streamingMessage.value?.process_steps?.length, () => {
  if (streamingMessage.value) {
--- a/dashboard/src/views/SettingsView.vue
+++ b/dashboard/src/views/SettingsView.vue
@ -102,6 +102,11 @@
          <label>模型名称</label>
          <input v-model="form.default_model" placeholder="deepseek-chat / gpt-4" required />
        </div>
        <div class="form-group">
          <label>最大 Tokens</label>
          <input v-model.number="form.max_tokens" type="number" placeholder="8192" min="1" />
          <span class="hint">单次回复最大 token 数，默认 8192</span>
        </div>
        <div class="form-group">
          <label class="switch-card" :class="{ active: form.is_default }">
            <div class="switch-content">
@ -201,7 +206,7 @@ const testResult = ref(null)
 const formError = ref('')
 const form = ref({
-  name: '', base_url: '', api_key: '', default_model: '', is_default: false
+  name: '', base_url: '', api_key: '', default_model: '', max_tokens: 8192, is_default: false
 })
 const fetchProviders = async () => {
@ -218,7 +223,7 @@ const fetchProviders = async () => {
 const closeModal = () => {
  showModal.value = false
  editing.value = null
-  form.value = { name: '', base_url: '', api_key: '', default_model: '', is_default: false }
+  form.value = { name: '', base_url: '', api_key: '', default_model: '', max_tokens: 8192, is_default: false }
  formError.value = ''
 }
@ -232,6 +237,7 @@ const editProvider = async (p) => {
        base_url: res.data.base_url,
        api_key: res.data.api_key || '',
        default_model: res.data.default_model,
        max_tokens: res.data.max_tokens || 8192,
        is_default: res.data.is_default
      }
    }
@ -381,6 +387,7 @@ input:checked + .slider:before { transform: translateX(22px); }
 .switch-card input:checked + .slider { background-color: var(--accent); }
 .switch-card input:checked + .slider:before { transform: translateX(22px); }
 .modal-actions { display: flex; justify-content: flex-end; gap: 1rem; margin-top: 1.5rem; }
 .form-group .hint { font-size: 0.85rem; color: var(--text); margin-top: 4px; display: block; }
 .spinner { width: 48px; height: 48px; border: 4px solid var(--border); border-top-color: var(--accent); border-radius: 50%; animation: spin 1s linear infinite; margin: 0 auto 1rem; }
@keyframes spin { to { transform: rotate(360deg); } }
 </style>
--- a/luxx/models.py
+++ b/luxx/models.py
@ -7,6 +7,10 @@ from sqlalchemy.orm import Mapped, mapped_column, relationship
 from luxx.database import Base
 def local_now():
    return datetime.now()
 class LLMProvider(Base):
    """LLM Provider configuration model"""
    __tablename__ = "llm_providers"
@ -18,10 +22,11 @@ class LLMProvider(Base):
    base_url: Mapped[str] = mapped_column(String(500), nullable=False)
    api_key: Mapped[str] = mapped_column(String(500), nullable=False)
    default_model: Mapped[str] = mapped_column(String(100), nullable=False, default="gpt-4")
    max_tokens: Mapped[int] = mapped_column(Integer, default=8192)  # 默认 8192
    is_default: Mapped[bool] = mapped_column(Boolean, default=False)
    enabled: Mapped[bool] = mapped_column(Boolean, default=True)
-    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
+    created_at: Mapped[datetime] = mapped_column(DateTime, default=local_now)
-    updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+    updated_at: Mapped[datetime] = mapped_column(DateTime, default=local_now, onupdate=local_now)
    # Relationships
    user: Mapped["User"] = relationship("User", backref="llm_providers")
@ -35,6 +40,7 @@ class LLMProvider(Base):
            "provider_type": self.provider_type,
            "base_url": self.base_url,
            "default_model": self.default_model,
            "max_tokens": self.max_tokens,
            "is_default": self.is_default,
            "enabled": self.enabled,
            "created_at": self.created_at.isoformat() if self.created_at else None,
@ -53,8 +59,8 @@ class Project(Base):
    user_id: Mapped[int] = mapped_column(Integer, ForeignKey("users.id"), nullable=False)
    name: Mapped[str] = mapped_column(String(255), nullable=False)
    description: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
-    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
+    created_at: Mapped[datetime] = mapped_column(DateTime, default=local_now)
-    updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+    updated_at: Mapped[datetime] = mapped_column(DateTime, default=local_now, onupdate=local_now)
    # Relationships
    user: Mapped["User"] = relationship("User", backref="projects")
@ -70,7 +76,7 @@ class User(Base):
    password_hash: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
    role: Mapped[str] = mapped_column(String(20), default="user")
    is_active: Mapped[bool] = mapped_column(Boolean, default=True)
-    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
+    created_at: Mapped[datetime] = mapped_column(DateTime, default=local_now)
    # Relationships
    conversations: Mapped[List["Conversation"]] = relationship(
@ -102,8 +108,8 @@ class Conversation(Base):
    temperature: Mapped[float] = mapped_column(Float, default=0.7)
    max_tokens: Mapped[int] = mapped_column(Integer, default=2000)
    thinking_enabled: Mapped[bool] = mapped_column(Boolean, default=False)
-    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
+    created_at: Mapped[datetime] = mapped_column(DateTime, default=local_now)
-    updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+    updated_at: Mapped[datetime] = mapped_column(DateTime, default=local_now, onupdate=local_now)
    # Relationships
    user: Mapped["User"] = relationship("User", back_populates="conversations")
@ -161,7 +167,7 @@ class Message(Base):
    role: Mapped[str] = mapped_column(String(16), nullable=False)  # user, assistant, system, tool
    content: Mapped[str] = mapped_column(Text, nullable=False, default="")
    token_count: Mapped[int] = mapped_column(Integer, default=0)
-    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
+    created_at: Mapped[datetime] = mapped_column(DateTime, default=local_now)
    # Relationships
    conversation: Mapped["Conversation"] = relationship("Conversation", back_populates="messages")
--- a/luxx/routes/messages.py
+++ b/luxx/routes/messages.py
@ -79,8 +79,8 @@ def send_message(
    )
    db.add(user_message)
-    from datetime import datetime
+    from datetime import datetime, timezone, timedelta
-    conversation.updated_at = datetime.utcnow()
+    conversation.updated_at = datetime.now(timezone(timedelta(hours=8)))
    response = chat_service.non_stream_response(
        conversation=conversation,
@ -133,7 +133,7 @@ async def stream_message(
        token_count=len(data.content) // 4
    )
    db.add(user_message)
-    conversation.updated_at = datetime.utcnow()
+    conversation.updated_at = datetime.now()
    db.commit()
    async def event_generator():
--- a/luxx/services/chat.py
+++ b/luxx/services/chat.py
@ -20,7 +20,8 @@ def _sse_event(event: str, data: dict) -> str:
 def get_llm_client(conversation: Conversation = None):
-    """Get LLM client, optionally using conversation's provider"""
+    """Get LLM client, optionally using conversation's provider. Returns (client, max_tokens)"""
    max_tokens = None
    if conversation and conversation.provider_id:
        from luxx.models import LLMProvider
        from luxx.database import SessionLocal
@ -28,18 +29,19 @@ def get_llm_client(conversation: Conversation = None):
        try:
            provider = db.query(LLMProvider).filter(LLMProvider.id == conversation.provider_id).first()
            if provider:
                max_tokens = provider.max_tokens
                client = LLMClient(
                    api_key=provider.api_key,
                    api_url=provider.base_url,
                    model=provider.default_model
                )
-                return client
+                return client, max_tokens
        finally:
            db.close()
    # Fallback to global config
    client = LLMClient()
-    return client
+    return client, max_tokens
 class ChatService:
@ -112,8 +114,10 @@ class ChatService:
            tools = registry.list_all() if tools_enabled else None
-            llm = get_llm_client(conversation)
+            llm, provider_max_tokens = get_llm_client(conversation)
            model = conversation.model or llm.default_model or "gpt-4"
            # 使用 provider 的 max_tokens，如果 conversation 有自己的 max_tokens 则覆盖
            max_tokens = conversation.max_tokens if hasattr(conversation, 'max_tokens') and conversation.max_tokens else provider_max_tokens
            # State tracking
            all_steps = []
@ -146,7 +150,7 @@ class ChatService:
                    messages=messages,
                    tools=tools,
                    temperature=conversation.temperature,
-                    max_tokens=conversation.max_tokens
+                    max_tokens=max_tokens or 8192
                ):
                    # Parse SSE line
                    # Format: "event: xxx\ndata: {...}\n\n"
--- a/luxx/tools/builtin/crawler.py
+++ b/luxx/tools/builtin/crawler.py
@ -1,14 +1,11 @@
-"""Web crawler tools"""
+"""Crawler related tools"""
 import requests
 from typing import Dict, Any, List, Optional
 from bs4 import BeautifulSoup
 from luxx.tools.factory import tool
 from luxx.tools.services import SearchService, FetchService
@tool(
    name="web_search",
-    description="Search the internet for information using web search",
+    description="Search the internet for information. Use when you need to find latest news or answer questions.",
    parameters={
        "type": "object",
        "properties": {
@ -18,7 +15,7 @@ from luxx.tools.factory import tool
            },
            "max_results": {
                "type": "integer",
-                "description": "Maximum number of results to return",
+                "description": "Number of results to return, default 5",
                "default": 5
            }
        },
@ -26,164 +23,106 @@ from luxx.tools.factory import tool
    },
    category="crawler"
 )
-def web_search(arguments: Dict[str, Any]) -> Dict[str, Any]:
+def web_search(arguments: dict) -> dict:
    """
-    Execute web search
+    Web search tool using DuckDuckGo
    Note: This is a placeholder implementation, real usage requires integrating with actual search APIs
    such as: Google Custom Search, DuckDuckGo, SerpAPI, etc.
    """
-    query = arguments.get("query", "")
+    query = arguments["query"]
    max_results = arguments.get("max_results", 5)
-    if not query:
+    service = SearchService()
-        return {"success": False, "error": "Query is required"}
+    results = service.search(query, max_results)
-    # Simulated search results
+    if not results:
-    # Real implementation should integrate with actual search API
+        return {"success": True, "data": {"query": query, "results": []}, "message": "No results found"}
-    return {
+
-        "success": True,
+    return {"success": True, "data": {"query": query, "results": results}}
        "data": {
            "query": query,
            "results": [
                {
                    "title": f"Result for '{query}' - Example {i+1}",
                    "url": f"https://example.com/result_{i+1}",
                    "snippet": f"This is a sample search result for the query '{query}'. " * 3
                }
                for i in range(min(max_results, 5))
            ]
        }
    }
@tool(
    name="web_fetch",
-    description="Fetch and parse content from a web page",
+    description="Fetch content from a webpage. Use when user needs detailed information from a page.",
    parameters={
        "type": "object",
        "properties": {
            "url": {
                "type": "string",
-                "description": "URL of the web page to fetch"
+                "description": "URL of the webpage to fetch"
            },
-            "extract_text": {
+            "extract_type": {
-                "type": "boolean",
+                "type": "string",
-                "description": "Whether to extract text content only",
+                "description": "Extraction type: text, links, or structured",
-                "default": True
+                "enum": ["text", "links", "structured"],
                "default": "text"
            }
        },
        "required": ["url"]
    },
    category="crawler"
 )
-def web_fetch(arguments: Dict[str, Any]) -> Dict[str, Any]:
+def web_fetch(arguments: dict) -> dict:
-    """Fetch and parse web page content"""
+    """
-    url = arguments.get("url", "")
+    Page fetch tool
-    extract_text = arguments.get("extract_text", True)
+    """
    url = arguments["url"]
    extract_type = arguments.get("extract_type", "text")
    if not url:
        return {"success": False, "error": "URL is required"}
-    # Simple URL validation
+    service = FetchService(timeout=15)
-    if not url.startswith(("http://", "https://")):
+    result = service.fetch(url, extract_type)
        url = "https://" + url
-    try:
+    if "error" in result:
-        headers = {
+        return {"success": False, "error": result["error"]}
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
        }
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
-        if extract_text:
+    return {"success": True, "data": result}
            soup = BeautifulSoup(response.text, "html.parser")
            # Remove script and style tags
            for tag in soup(["script", "style"]):
                tag.decompose()
            text = soup.get_text(separator="\n", strip=True)
            # Clean up extra blank lines
            lines = [line.strip() for line in text.split("\n") if line.strip()]
            text = "\n".join(lines)
            return {
                "success": True,
                "data": {
                    "url": url,
                    "title": soup.title.string if soup.title else "",
                    "content": text[:10000]  # Limit content length
                }
            }
        else:
            return {
                "success": True,
                "data": {
                    "url": url,
                    "html": response.text[:50000]  # Limit HTML length
                }
            }
    except requests.RequestException as e:
        return {"success": False, "error": f"Failed to fetch URL: {str(e)}"}
@tool(
-    name="extract_links",
+    name="batch_fetch",
-    description="Extract all links from a web page",
+    description="Batch fetch multiple webpages. Use when you need to get content from multiple pages.",
    parameters={
        "type": "object",
        "properties": {
-            "url": {
+            "urls": {
-                "type": "string",
+                "type": "array",
-                "description": "URL of the web page"
+                "items": {"type": "string"},
                "description": "List of URLs to fetch"
            },
-            "max_links": {
+            "extract_type": {
-                "type": "integer",
+                "type": "string",
-                "description": "Maximum number of links to extract",
+                "enum": ["text", "links", "structured"],
-                "default": 20
+                "default": "text"
            }
        },
-        "required": ["url"]
+        "required": ["urls"]
    },
    category="crawler"
 )
-def extract_links(arguments: Dict[str, Any]) -> Dict[str, Any]:
+def batch_fetch(arguments: dict) -> dict:
-    """Extract all links from a web page"""
+    """
-    url = arguments.get("url", "")
+    Batch fetch tool
-    max_links = arguments.get("max_links", 20)
+    """
    urls = arguments["urls"]
    extract_type = arguments.get("extract_type", "text")
-    if not url:
+    if not urls:
-        return {"success": False, "error": "URL is required"}
+        return {"success": False, "error": "URLs list is required"}
-    if not url.startswith(("http://", "https://")):
+    if len(urls) > 10:
-        url = "https://" + url
+        return {"success": False, "error": "Maximum 10 pages allowed"}
-    try:
+    service = FetchService(timeout=10)
-        headers = {
+    results = service.fetch_batch(urls, extract_type)
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
+
    successful = sum(1 for r in results if "error" not in r)
    return {
        "success": True,
        "data": {
            "results": results,
            "total": len(results),
            "successful": successful
        }
-        response = requests.get(url, headers=headers, timeout=10)
+    }
        response.raise_for_status()
        soup = BeautifulSoup(response.text, "html.parser")
        links = []
        for a_tag in soup.find_all("a", href=True)[:max_links]:
            href = a_tag["href"]
            # Handle relative URLs
            if href.startswith("/"):
                from urllib.parse import urljoin
                href = urljoin(url, href)
            links.append({
                "text": a_tag.get_text(strip=True) or href,
                "url": href
            })
        return {
            "success": True,
            "data": {
                "url": url,
                "links": links
            }
        }
    except requests.RequestException as e:
        return {"success": False, "error": f"Failed to fetch URL: {str(e)}"}
--- a/luxx/tools/executor.py
+++ b/luxx/tools/executor.py
@ -156,7 +156,7 @@ class ToolExecutor:
            "tool_call_id": call_id,
            "role": "tool",
            "name": name,
-            "content": json.dumps(result)
+            "content": json.dumps(result, ensure_ascii=False)
        }
    def _create_error_result(self, call_id: str, name: str, error: str) -> Dict[str, Any]:
@ -165,7 +165,7 @@ class ToolExecutor:
            "tool_call_id": call_id,
            "role": "tool",
            "name": name,
-            "content": json.dumps({"success": False, "error": error})
+            "content": json.dumps({"success": False, "error": error}, ensure_ascii=False)
        }
    def clear_cache(self) -> None:
--- a/luxx/tools/services.py
+++ b/luxx/tools/services.py
@ -0,0 +1,247 @@
 """Tool helper services"""
 import re
 import httpx
 from urllib.parse import parse_qs, urlparse
 from typing import List
 from concurrent.futures import ThreadPoolExecutor, as_completed
 class SearchService:
    """Search service using DuckDuckGo"""
    def __init__(self, engine: str = "duckduckgo"):
        self.engine = engine
    def search(
        self,
        query: str,
        max_results: int = 5,
        region: str = "cn-zh"
    ) -> List[dict]:
        """
        Execute search
        Args:
            query: Search keywords
            max_results: Max result count
            region: Region setting
        Returns:
            Search result list
        """
        if self.engine == "duckduckgo":
            return self._search_duckduckgo(query, max_results, region)
        else:
            raise ValueError(f"Unsupported search engine: {self.engine}")
    def _search_duckduckgo(
        self,
        query: str,
        max_results: int,
        region: str
    ) -> List[dict]:
        """DuckDuckGo search via HTML"""
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
            "Accept": "text/html,application/xhtml+xml",
        }
        from urllib.parse import quote
        encoded_query = quote(query)
        url = f"https://html.duckduckgo.com/html/?q={encoded_query}"
        try:
            resp = httpx.get(url, headers=headers, timeout=15, follow_redirects=True)
            resp.raise_for_status()
        except Exception:
            return []
        from bs4 import BeautifulSoup
        soup = BeautifulSoup(resp.text, "html.parser")
        results = []
        for result in soup.select(".result")[:max_results]:
            title_elem = result.select_one(".result__title a")
            snippet_elem = result.select_one(".result__snippet")
            if title_elem:
                raw_url = title_elem.get("href", "")
                # Clean DuckDuckGo redirect URL
                if "uddg=" in raw_url:
                    parsed = urlparse(raw_url)
                    params = parse_qs(parsed.query)
                    clean_url = params.get("uddg", [raw_url])[0]
                else:
                    clean_url = raw_url
                results.append({
                    "title": title_elem.get_text(strip=True),
                    "url": clean_url,
                    "snippet": snippet_elem.get_text(strip=True) if snippet_elem else ""
                })
        return results
 class FetchService:
    """Page fetch service"""
    def __init__(self, timeout: float = 15.0):
        self.timeout = timeout
        self.user_agent = (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/120.0.0.0 Safari/537.36"
        )
    def fetch(
        self,
        url: str,
        extract_type: str = "text"
    ) -> dict:
        """
        Fetch a single page
        Args:
            url: Page URL
            extract_type: Extract type (text, links, structured)
        Returns:
            Fetch result
        """
        if not url.startswith(("http://", "https://")):
            url = "https://" + url
        try:
            resp = httpx.get(
                url,
                timeout=self.timeout,
                follow_redirects=True,
                headers={"User-Agent": self.user_agent}
            )
            resp.raise_for_status()
        except httpx.TimeoutException:
            return {"error": "Request timeout", "url": url}
        except Exception as e:
            return {"error": str(e), "url": url}
        html = resp.text
        extractor = ContentExtractor(html)
        if extract_type == "text":
            return {
                "url": url,
                "title": extractor.extract_title(),
                "text": extractor.extract_text()[:15000]
            }
        elif extract_type == "links":
            return {
                "url": url,
                "links": extractor.extract_links()
            }
        else:
            return extractor.extract_structured(url)
    def fetch_batch(
        self,
        urls: List[str],
        extract_type: str = "text",
        max_concurrent: int = 5
    ) -> List[dict]:
        """
        Batch fetch pages concurrently.
        Args:
            urls: URL list
            extract_type: Extract type
            max_concurrent: Max concurrent requests (1-5, default 5)
        Returns:
            Result list (same order as input URLs)
        """
        if len(urls) <= 1:
            return [self.fetch(url, extract_type) for url in urls]
        max_concurrent = min(max(max_concurrent, 1), 5)
        results = [None] * len(urls)
        with ThreadPoolExecutor(max_workers=max_concurrent) as pool:
            futures = {
                pool.submit(self.fetch, url, extract_type): i
                for i, url in enumerate(urls)
            }
            for future in as_completed(futures):
                idx = futures[future]
                try:
                    results[idx] = future.result()
                except Exception as e:
                    results[idx] = {"error": str(e)}
        return results
 class ContentExtractor:
    """Content extractor using BeautifulSoup"""
    def __init__(self, html: str):
        self.html = html
        self._soup = None
    @property
    def soup(self):
        if self._soup is None:
            from bs4 import BeautifulSoup
            self._soup = BeautifulSoup(self.html, "html.parser")
        return self._soup
    def extract_title(self) -> str:
        """Extract page title"""
        if self.soup.title:
            return self.soup.title.string or ""
        return ""
    def extract_text(self) -> str:
        """Extract plain text"""
        # Remove script and style
        for tag in self.soup(["script", "style", "nav", "footer", "header", "aside"]):
            tag.decompose()
        text = self.soup.get_text(separator="\n", strip=True)
        # Clean extra whitespace
        text = re.sub(r"\n{3,}", "\n\n", text)
        return text
    def extract_links(self, max_count: int = 50) -> List[dict]:
        """Extract links"""
        links = []
        for a in self.soup.find_all("a", href=True):
            text = a.get_text(strip=True)
            href = a["href"]
            if text and href and not href.startswith(("#", "javascript:", "mailto:", "tel:")):
                links.append({"text": text, "url": href})
            if len(links) >= max_count:
                break
        return links
    def extract_structured(self, url: str = "") -> dict:
        """Extract structured content"""
        soup = self.soup
        # Extract title
        title = ""
        if soup.title:
            title = soup.title.string or ""
        # Extract meta description
        description = ""
        meta_desc = soup.find("meta", attrs={"name": "description"})
        if meta_desc:
            description = meta_desc.get("content", "")
        return {
            "url": url,
            "title": title.strip(),
            "description": description.strip(),
            "text": self.extract_text()[:5000],
            "links": self.extract_links(20)
        }
--- a/pyproject.toml
+++ b/pyproject.toml
@ -2,7 +2,6 @@
 name = "luxx"
 version = "1.0.0"
 description = "luxx - FastAPI + SQLAlchemy"
 readme = "docs/README.md"
 requires-python = ">=3.10"
 dependencies = [
@ -19,6 +18,7 @@ dependencies = [
    "requests>=2.31.0",
    "beautifulsoup4>=4.12.3",
    "lxml>=5.1.0",
    "httpx>=0.26.0",
    "pyyaml>=6.0.1",
    "shortuuid>=1.0.11",
    "pydantic>=2.5.0",
@ -34,3 +34,6 @@ dev = [
    "black>=24.0.0",
    "ruff>=0.1.0",
 ]
 [tool.setuptools]
 packages = ["luxx"]
--- a/luxx/run.py
+++ b/luxx/run.py