feat: 完善爬虫功能

2026-04-13 08:38:12 +08:00 · 2026-04-13 08:38:12 +08:00 · a84b8617a6
parent 805f8c86da
commit a84b8617a6
17 changed files with 422 additions and 172 deletions
--- a/.gitignore
+++ b/.gitignore
@ -9,7 +9,7 @@
 !README.md
 !.gitignore

-!luxx/**/*.py
+!*.py
 !asserts/**/*.md

 # Dashboard
--- a/asserts/ARCHITECTURE.md
+++ b/asserts/ARCHITECTURE.md
@ -5,9 +5,13 @@
 - **框架**: FastAPI 0.109+
 - **数据库**: SQLAlchemy 2.0+
 - **认证**: JWT (PyJWT)
- **HTTP客户端**: httpx
+- **HTTP客户端**: httpx, requests
 - **配置**: YAML (PyYAML)
 - **代码执行**: Python 原生执行
+- **网页爬虫**:
+  - `httpx` - HTTP 客户端
+  - `beautifulsoup4` - HTML 解析
+  - `lxml` - XML/HTML 解析器

 ## 目录结构

@ -36,6 +40,7 @@ luxx/
 │       ├── crawler.py  # 网页爬虫
 │       ├── data.py     # 数据处理
 │       └── weather.py  # 天气查询
+│   └── services.py     # 工具服务层
 └── utils/              # 工具函数
    └── helpers.py
 ```
@ -205,7 +210,9 @@ classDiagram
 |------|------|------|
 | `python_execute` | 执行 Python 代码 | 支持 print 输出、变量访问 |
 | `python_eval` | 计算表达式 | 快速求值 |
-| `web_crawl` | 网页抓取 | BeautifulSoup + httpx |
+| `web_search` | DuckDuckGo HTML | DuckDuckGo HTML 搜索 |
+| `web_fetch` | 网页抓取 | httpx + BeautifulSoup，支持 text/links/structured |
+| `batch_fetch` | 批量抓取 | 并发获取多个页面 |
 | `get_weather` | 天气查询 | 支持城市名查询 |
 | `process_data` | 数据处理 | JSON 转换、格式化等 |

--- a/config.yaml
+++ b/config.yaml
@ -7,7 +7,7 @@ app:

 database:
  type: sqlite
-  url: sqlite:///../chat.db
+  url: sqlite:///./chat.db

 llm:
  provider: deepseek
--- a/dashboard/src/components/MessageBubble.vue
+++ b/dashboard/src/components/MessageBubble.vue
@ -66,7 +66,9 @@ const renderedContent = computed(() => {

 function formatTime(time) {
  if (!time) return ''
-  return new Date(time).toLocaleTimeString('zh-CN', { hour: '2-digit', minute: '2-digit' })
+  const date = new Date(time)
+  // 使用本地时区显示
+  return date.toLocaleTimeString('zh-CN', { hour: '2-digit', minute: '2-digit' })
 }

 function copyContent() {
--- a/dashboard/src/components/ProcessBlock.vue
+++ b/dashboard/src/components/ProcessBlock.vue
@ -9,10 +9,11 @@
          <span class="step-label">思考中</span>
          <span class="step-brief">{{ item.brief || '正在思考...' }}</span>
          <span v-if="streaming && item.key === lastThinkingKey" class="loading-dots">...</span>
+          <span v-else-if="item.content && item.content.length > 1024" class="truncate-hint">已截断</span>
          <span class="arrow" :class="{ open: expandedKeys.has(item.key) }" v-html="chevronDown"></span>
        </div>
        <div v-if="expandedKeys.has(item.key)" class="step-content">
-          <div class="thinking-text">{{ item.content }}</div>
+          <div class="thinking-text">{{ item.displayContent }}</div>
        </div>
      </div>

@ -25,6 +26,7 @@
          <span v-if="item.loading" class="loading-dots">...</span>
          <span v-else-if="item.isSuccess === true" class="step-badge success">成功</span>
          <span v-else-if="item.isSuccess === false" class="step-badge error">失败</span>
+          <span v-if="item.fullResult && item.fullResult.length > 1024" class="truncate-hint">已截断</span>
          <span class="arrow" :class="{ open: expandedKeys.has(item.key) }" v-html="chevronDown"></span>
        </div>
        <div v-if="expandedKeys.has(item.key)" class="step-content">
@ -34,7 +36,7 @@
          </div>
          <div v-if="item.resultSummary || item.fullResult" class="tool-detail" style="margin-top: 8px;">
            <span class="detail-label">结果</span>
-            <pre>{{ item.fullResult || item.resultSummary }}</pre>
+            <pre>{{ item.displayResult }}</pre>
          </div>
        </div>
      </div>
@ -71,12 +73,14 @@ const allItems = computed(() => {
  if (props.processSteps && props.processSteps.length > 0) {
    for (const step of props.processSteps) {
      if (step.type === 'thinking') {
+        const content = step.content || ''
        items.push({
          key: step.id || `thinking-${step.index}`,
          type: 'thinking',
          index: step.index,
-          content: step.content || '',
-          brief: step.content ? step.content.slice(0, 50) + (step.content.length > 50 ? '...' : '') : '',
+          content: content,
+          displayContent: content.length > 1024 ? content.slice(0, 1024) + '\n\n[... 内容已截断 ...]' : content,
+          brief: content.slice(0, 50) + (content.length > 50 ? '...' : ''),
        })
      } else if (step.type === 'tool_call') {
        items.push({
@ -97,12 +101,15 @@ const allItems = computed(() => {
        const toolId = step.id_ref || step.id
        const match = items.findLast(it => it.type === 'tool_call' && it.id === toolId)
        if (match) {
-          match.resultSummary = step.content ? step.content.slice(0, 200) : ''
-          match.fullResult = step.content || ''
+          const resultContent = step.content || ''
+          match.resultSummary = resultContent.slice(0, 200)
+          match.fullResult = resultContent
+          match.displayResult = resultContent.length > 1024 ? resultContent.slice(0, 1024) + '\n\n[... 结果已截断 ...]' : resultContent
          match.isSuccess = step.success !== false
          match.loading = false
        } else {
          // 如果没有找到对应的 tool_call，创建一个占位符
+          const placeholderContent = step.content || ''
          items.push({
            key: `result-${step.id || step.index}`,
            type: 'tool_call',
@ -113,8 +120,9 @@ const allItems = computed(() => {
            brief: step.name || '工具结果',
            loading: false,
            isSuccess: true,
-            resultSummary: step.content ? step.content.slice(0, 200) : '',
-            fullResult: step.content || ''
+            resultSummary: placeholderContent.slice(0, 200),
+            fullResult: placeholderContent,
+            displayResult: placeholderContent.length > 1024 ? placeholderContent.slice(0, 1024) + '\n\n[... 结果已截断 ...]' : placeholderContent
          })
        }
      } else if (step.type === 'text') {
@ -280,6 +288,15 @@ const sparkleIcon = `<svg viewBox="0 0 24 24" width="14" height="14" fill="none"
  color: var(--success-color);
 }

+.truncate-hint {
+  font-size: 10px;
+  padding: 2px 6px;
+  background: var(--warning-bg);
+  color: var(--warning-color);
+  border-radius: 4px;
+  margin-left: 4px;
+}
+
 .step-badge.error {
  background: var(--danger-bg);
  color: var(--danger-color);
--- a/dashboard/src/style.css
+++ b/dashboard/src/style.css
@ -41,6 +41,8 @@
  /* 状态颜色 */
  --success-color: #059669;
  --success-bg: rgba(16, 185, 129, 0.1);
+  --warning-color: #d97706;
+  --warning-bg: rgba(217, 119, 6, 0.1);
  --danger-color: #ef4444;
  --danger-bg: rgba(239, 68, 68, 0.08);

@ -112,6 +114,8 @@

  --success-color: #34d399;
  --success-bg: rgba(52, 211, 153, 0.15);
+  --warning-color: #fbbf24;
+  --warning-bg: rgba(251, 191, 36, 0.15);
  --danger-color: #f87171;
  --danger-bg: rgba(248, 113, 113, 0.15);

--- a/dashboard/src/utils/markdown.js
+++ b/dashboard/src/utils/markdown.js
@ -52,9 +52,9 @@ const blockMathExtension = {
 }

 marked.use({
-  extensions: [blockMathExtension, mathExtension],
+  gfm: true,
  breaks: true,
-  gfm: true
+  extensions: [blockMathExtension, mathExtension]
 })

 export function renderMarkdown(text) {
--- a/dashboard/src/views/ConversationDetailView.vue
+++ b/dashboard/src/views/ConversationDetailView.vue
@ -24,7 +24,7 @@
        </div>
      </div>

-      <div ref="messagesContainer" class="messages-container">
+      <div ref="messagesContainer" class="messages-container" @scroll="handleScroll">
        <div v-if="loading" class="load-more-top">
          <span>加载中...</span>
        </div>
@ -106,6 +106,7 @@ const sending = ref(false)
 const streamingMessage = ref(null)
 const messagesContainer = ref(null)
 const textareaRef = ref(null)
+const autoScroll = ref(true)
 const conversationId = ref(route.params.id)
 const conversationTitle = ref('')

@ -128,6 +129,7 @@ function onKeydown(e) {
 }

 const loadMessages = async () => {
+  autoScroll.value = true
  loading.value = true
  try {
    const res = await messagesAPI.list(conversationId.value)
@ -191,6 +193,7 @@ const sendMessage = async () => {
    { conversation_id: conversationId.value, content },
    {
      onProcessStep: (step) => {
+        autoScroll.value = true  // 流式开始时启用自动滚动
        if (!streamingMessage.value) return
        // 按 id 更新或追加步骤
        const idx = streamingMessage.value.process_steps.findIndex(s => s.id === step.id)
@ -202,6 +205,7 @@ const sendMessage = async () => {
      },
      onDone: () => {
        // 完成，添加到消息列表
+        autoScroll.value = true
        if (streamingMessage.value) {
          messages.value.push({
            ...streamingMessage.value,
@ -230,6 +234,7 @@ const sendMessage = async () => {
 }

 const scrollToBottom = () => {
+  if (!autoScroll.value) return
  nextTick(() => {
    if (messagesContainer.value) {
      messagesContainer.value.scrollTo({
@ -240,6 +245,15 @@ const scrollToBottom = () => {
  })
 }

+// 处理滚动事件，检测用户是否手动滚动
+const handleScroll = () => {
+  if (!messagesContainer.value) return
+  const { scrollTop, scrollHeight, clientHeight } = messagesContainer.value
+  const distanceToBottom = scrollHeight - scrollTop - clientHeight
+  // 距离底部超过50px时停止自动跟随
+  autoScroll.value = distanceToBottom < 50
+}
+
 // 监听流式消息变化，自动滚动
 watch(() => streamingMessage.value?.process_steps?.length, () => {
  if (streamingMessage.value) {
--- a/dashboard/src/views/SettingsView.vue
+++ b/dashboard/src/views/SettingsView.vue
@ -102,6 +102,11 @@
          <label>模型名称</label>
          <input v-model="form.default_model" placeholder="deepseek-chat / gpt-4" required />
        </div>
+        <div class="form-group">
+          <label>最大 Tokens</label>
+          <input v-model.number="form.max_tokens" type="number" placeholder="8192" min="1" />
+          <span class="hint">单次回复最大 token 数，默认 8192</span>
+        </div>
        <div class="form-group">
          <label class="switch-card" :class="{ active: form.is_default }">
            <div class="switch-content">
@ -201,7 +206,7 @@ const testResult = ref(null)
 const formError = ref('')

 const form = ref({
-  name: '', base_url: '', api_key: '', default_model: '', is_default: false
+  name: '', base_url: '', api_key: '', default_model: '', max_tokens: 8192, is_default: false
 })

 const fetchProviders = async () => {
@ -218,7 +223,7 @@ const fetchProviders = async () => {
 const closeModal = () => {
  showModal.value = false
  editing.value = null
-  form.value = { name: '', base_url: '', api_key: '', default_model: '', is_default: false }
+  form.value = { name: '', base_url: '', api_key: '', default_model: '', max_tokens: 8192, is_default: false }
  formError.value = ''
 }

@ -232,6 +237,7 @@ const editProvider = async (p) => {
        base_url: res.data.base_url,
        api_key: res.data.api_key || '',
        default_model: res.data.default_model,
+        max_tokens: res.data.max_tokens || 8192,
        is_default: res.data.is_default
      }
    }
@ -381,6 +387,7 @@ input:checked + .slider:before { transform: translateX(22px); }
 .switch-card input:checked + .slider { background-color: var(--accent); }
 .switch-card input:checked + .slider:before { transform: translateX(22px); }
 .modal-actions { display: flex; justify-content: flex-end; gap: 1rem; margin-top: 1.5rem; }
+.form-group .hint { font-size: 0.85rem; color: var(--text); margin-top: 4px; display: block; }
 .spinner { width: 48px; height: 48px; border: 4px solid var(--border); border-top-color: var(--accent); border-radius: 50%; animation: spin 1s linear infinite; margin: 0 auto 1rem; }
@keyframes spin { to { transform: rotate(360deg); } }
 </style>
--- a/luxx/models.py
+++ b/luxx/models.py
@ -7,6 +7,10 @@ from sqlalchemy.orm import Mapped, mapped_column, relationship
 from luxx.database import Base


+def local_now():
+    return datetime.now()
+
+
 class LLMProvider(Base):
    """LLM Provider configuration model"""
    __tablename__ = "llm_providers"
@ -18,10 +22,11 @@ class LLMProvider(Base):
    base_url: Mapped[str] = mapped_column(String(500), nullable=False)
    api_key: Mapped[str] = mapped_column(String(500), nullable=False)
    default_model: Mapped[str] = mapped_column(String(100), nullable=False, default="gpt-4")
+    max_tokens: Mapped[int] = mapped_column(Integer, default=8192)  # 默认 8192
    is_default: Mapped[bool] = mapped_column(Boolean, default=False)
    enabled: Mapped[bool] = mapped_column(Boolean, default=True)
-    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
-    updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+    created_at: Mapped[datetime] = mapped_column(DateTime, default=local_now)
+    updated_at: Mapped[datetime] = mapped_column(DateTime, default=local_now, onupdate=local_now)
    
    # Relationships
    user: Mapped["User"] = relationship("User", backref="llm_providers")
@ -35,6 +40,7 @@ class LLMProvider(Base):
            "provider_type": self.provider_type,
            "base_url": self.base_url,
            "default_model": self.default_model,
+            "max_tokens": self.max_tokens,
            "is_default": self.is_default,
            "enabled": self.enabled,
            "created_at": self.created_at.isoformat() if self.created_at else None,
@ -53,8 +59,8 @@ class Project(Base):
    user_id: Mapped[int] = mapped_column(Integer, ForeignKey("users.id"), nullable=False)
    name: Mapped[str] = mapped_column(String(255), nullable=False)
    description: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
-    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
-    updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+    created_at: Mapped[datetime] = mapped_column(DateTime, default=local_now)
+    updated_at: Mapped[datetime] = mapped_column(DateTime, default=local_now, onupdate=local_now)
    
    # Relationships
    user: Mapped["User"] = relationship("User", backref="projects")
@ -70,7 +76,7 @@ class User(Base):
    password_hash: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
    role: Mapped[str] = mapped_column(String(20), default="user")
    is_active: Mapped[bool] = mapped_column(Boolean, default=True)
-    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
+    created_at: Mapped[datetime] = mapped_column(DateTime, default=local_now)
    
    # Relationships
    conversations: Mapped[List["Conversation"]] = relationship(
@ -102,8 +108,8 @@ class Conversation(Base):
    temperature: Mapped[float] = mapped_column(Float, default=0.7)
    max_tokens: Mapped[int] = mapped_column(Integer, default=2000)
    thinking_enabled: Mapped[bool] = mapped_column(Boolean, default=False)
-    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
-    updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+    created_at: Mapped[datetime] = mapped_column(DateTime, default=local_now)
+    updated_at: Mapped[datetime] = mapped_column(DateTime, default=local_now, onupdate=local_now)
    
    # Relationships
    user: Mapped["User"] = relationship("User", back_populates="conversations")
@ -161,7 +167,7 @@ class Message(Base):
    role: Mapped[str] = mapped_column(String(16), nullable=False)  # user, assistant, system, tool
    content: Mapped[str] = mapped_column(Text, nullable=False, default="")
    token_count: Mapped[int] = mapped_column(Integer, default=0)
-    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
+    created_at: Mapped[datetime] = mapped_column(DateTime, default=local_now)
    
    # Relationships
    conversation: Mapped["Conversation"] = relationship("Conversation", back_populates="messages")
--- a/luxx/routes/messages.py
+++ b/luxx/routes/messages.py
@ -79,8 +79,8 @@ def send_message(
    )
    db.add(user_message)
    
-    from datetime import datetime
-    conversation.updated_at = datetime.utcnow()
+    from datetime import datetime, timezone, timedelta
+    conversation.updated_at = datetime.now(timezone(timedelta(hours=8)))
    
    response = chat_service.non_stream_response(
        conversation=conversation,
@ -133,7 +133,7 @@ async def stream_message(
        token_count=len(data.content) // 4
    )
    db.add(user_message)
-    conversation.updated_at = datetime.utcnow()
+    conversation.updated_at = datetime.now()
    db.commit()
    
    async def event_generator():
--- a/luxx/services/chat.py
+++ b/luxx/services/chat.py
@ -20,7 +20,8 @@ def _sse_event(event: str, data: dict) -> str:


 def get_llm_client(conversation: Conversation = None):
-    """Get LLM client, optionally using conversation's provider"""
+    """Get LLM client, optionally using conversation's provider. Returns (client, max_tokens)"""
+    max_tokens = None
    if conversation and conversation.provider_id:
        from luxx.models import LLMProvider
        from luxx.database import SessionLocal
@ -28,18 +29,19 @@ def get_llm_client(conversation: Conversation = None):
        try:
            provider = db.query(LLMProvider).filter(LLMProvider.id == conversation.provider_id).first()
            if provider:
+                max_tokens = provider.max_tokens
                client = LLMClient(
                    api_key=provider.api_key,
                    api_url=provider.base_url,
                    model=provider.default_model
                )
-                return client
+                return client, max_tokens
        finally:
            db.close()
    
    # Fallback to global config
    client = LLMClient()
-    return client
+    return client, max_tokens


 class ChatService:
@ -112,8 +114,10 @@ class ChatService:
            
            tools = registry.list_all() if tools_enabled else None
            
-            llm = get_llm_client(conversation)
+            llm, provider_max_tokens = get_llm_client(conversation)
            model = conversation.model or llm.default_model or "gpt-4"
+            # 使用 provider 的 max_tokens，如果 conversation 有自己的 max_tokens 则覆盖
+            max_tokens = conversation.max_tokens if hasattr(conversation, 'max_tokens') and conversation.max_tokens else provider_max_tokens
            
            # State tracking
            all_steps = []
@ -146,7 +150,7 @@ class ChatService:
                    messages=messages,
                    tools=tools,
                    temperature=conversation.temperature,
-                    max_tokens=conversation.max_tokens
+                    max_tokens=max_tokens or 8192
                ):
                    # Parse SSE line
                    # Format: "event: xxx\ndata: {...}\n\n"
--- a/luxx/tools/builtin/crawler.py
+++ b/luxx/tools/builtin/crawler.py
@ -1,14 +1,11 @@
-"""Web crawler tools"""
-import requests
-from typing import Dict, Any, List, Optional
-from bs4 import BeautifulSoup
-
+"""Crawler related tools"""
 from luxx.tools.factory import tool
+from luxx.tools.services import SearchService, FetchService


@tool(
    name="web_search",
-    description="Search the internet for information using web search",
+    description="Search the internet for information. Use when you need to find latest news or answer questions.",
    parameters={
        "type": "object",
        "properties": {
@ -18,7 +15,7 @@ from luxx.tools.factory import tool
            },
            "max_results": {
                "type": "integer",
-                "description": "Maximum number of results to return",
+                "description": "Number of results to return, default 5",
                "default": 5
            }
        },
@ -26,40 +23,25 @@ from luxx.tools.factory import tool
    },
    category="crawler"
 )
-def web_search(arguments: Dict[str, Any]) -> Dict[str, Any]:
+def web_search(arguments: dict) -> dict:
    """
-    Execute web search
-    
-    Note: This is a placeholder implementation, real usage requires integrating with actual search APIs
-    such as: Google Custom Search, DuckDuckGo, SerpAPI, etc.
+    Web search tool using DuckDuckGo
    """
-    query = arguments.get("query", "")
+    query = arguments["query"]
    max_results = arguments.get("max_results", 5)

-    if not query:
-        return {"success": False, "error": "Query is required"}
+    service = SearchService()
+    results = service.search(query, max_results)

-    # Simulated search results
-    # Real implementation should integrate with actual search API
-    return {
-        "success": True,
-        "data": {
-            "query": query,
-            "results": [
-                {
-                    "title": f"Result for '{query}' - Example {i+1}",
-                    "url": f"https://example.com/result_{i+1}",
-                    "snippet": f"This is a sample search result for the query '{query}'. " * 3
-                }
-                for i in range(min(max_results, 5))
-            ]
-        }
-    }
+    if not results:
+        return {"success": True, "data": {"query": query, "results": []}, "message": "No results found"}
+
+    return {"success": True, "data": {"query": query, "results": results}}


@tool(
    name="web_fetch",
-    description="Fetch and parse content from a web page",
+    description="Fetch content from a webpage. Use when user needs detailed information from a page.",
    parameters={
        "type": "object",
        "properties": {
@ -67,123 +49,80 @@ def web_search(arguments: Dict[str, Any]) -> Dict[str, Any]:
                "type": "string",
                "description": "URL of the webpage to fetch"
            },
-            "extract_text": {
-                "type": "boolean",
-                "description": "Whether to extract text content only",
-                "default": True
+            "extract_type": {
+                "type": "string",
+                "description": "Extraction type: text, links, or structured",
+                "enum": ["text", "links", "structured"],
+                "default": "text"
            }
        },
        "required": ["url"]
    },
    category="crawler"
 )
-def web_fetch(arguments: Dict[str, Any]) -> Dict[str, Any]:
-    """Fetch and parse web page content"""
-    url = arguments.get("url", "")
-    extract_text = arguments.get("extract_text", True)
+def web_fetch(arguments: dict) -> dict:
+    """
+    Page fetch tool
+    """
+    url = arguments["url"]
+    extract_type = arguments.get("extract_type", "text")

    if not url:
        return {"success": False, "error": "URL is required"}

-    # Simple URL validation
-    if not url.startswith(("http://", "https://")):
-        url = "https://" + url
+    service = FetchService(timeout=15)
+    result = service.fetch(url, extract_type)

-    try:
-        headers = {
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
-        }
-        response = requests.get(url, headers=headers, timeout=10)
-        response.raise_for_status()
+    if "error" in result:
+        return {"success": False, "error": result["error"]}

-        if extract_text:
-            soup = BeautifulSoup(response.text, "html.parser")
-            # Remove script and style tags
-            for tag in soup(["script", "style"]):
-                tag.decompose()
-            text = soup.get_text(separator="\n", strip=True)
-            # Clean up extra blank lines
-            lines = [line.strip() for line in text.split("\n") if line.strip()]
-            text = "\n".join(lines)
-            
-            return {
-                "success": True,
-                "data": {
-                    "url": url,
-                    "title": soup.title.string if soup.title else "",
-                    "content": text[:10000]  # Limit content length
-                }
-            }
-        else:
-            return {
-                "success": True,
-                "data": {
-                    "url": url,
-                    "html": response.text[:50000]  # Limit HTML length
-                }
-            }
-    except requests.RequestException as e:
-        return {"success": False, "error": f"Failed to fetch URL: {str(e)}"}
+    return {"success": True, "data": result}


@tool(
-    name="extract_links",
-    description="Extract all links from a web page",
+    name="batch_fetch",
+    description="Batch fetch multiple webpages. Use when you need to get content from multiple pages.",
    parameters={
        "type": "object",
        "properties": {
-            "url": {
-                "type": "string",
-                "description": "URL of the web page"
+            "urls": {
+                "type": "array",
+                "items": {"type": "string"},
+                "description": "List of URLs to fetch"
            },
-            "max_links": {
-                "type": "integer",
-                "description": "Maximum number of links to extract",
-                "default": 20
+            "extract_type": {
+                "type": "string",
+                "enum": ["text", "links", "structured"],
+                "default": "text"
            }
        },
-        "required": ["url"]
+        "required": ["urls"]
    },
    category="crawler"
 )
-def extract_links(arguments: Dict[str, Any]) -> Dict[str, Any]:
-    """Extract all links from a web page"""
-    url = arguments.get("url", "")
-    max_links = arguments.get("max_links", 20)
+def batch_fetch(arguments: dict) -> dict:
+    """
+    Batch fetch tool
+    """
+    urls = arguments["urls"]
+    extract_type = arguments.get("extract_type", "text")

-    if not url:
-        return {"success": False, "error": "URL is required"}
+    if not urls:
+        return {"success": False, "error": "URLs list is required"}

-    if not url.startswith(("http://", "https://")):
-        url = "https://" + url
+    if len(urls) > 10:
+        return {"success": False, "error": "Maximum 10 pages allowed"}

-    try:
-        headers = {
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
-        }
-        response = requests.get(url, headers=headers, timeout=10)
-        response.raise_for_status()
+    service = FetchService(timeout=10)
+    results = service.fetch_batch(urls, extract_type)

-        soup = BeautifulSoup(response.text, "html.parser")
-        links = []
-        
-        for a_tag in soup.find_all("a", href=True)[:max_links]:
-            href = a_tag["href"]
-            # Handle relative URLs
-            if href.startswith("/"):
-                from urllib.parse import urljoin
-                href = urljoin(url, href)
-            links.append({
-                "text": a_tag.get_text(strip=True) or href,
-                "url": href
-            })
+    successful = sum(1 for r in results if "error" not in r)

    return {
        "success": True,
        "data": {
-                "url": url,
-                "links": links
+            "results": results,
+            "total": len(results),
+            "successful": successful
        }
    }
-    except requests.RequestException as e:
-        return {"success": False, "error": f"Failed to fetch URL: {str(e)}"}
--- a/luxx/tools/executor.py
+++ b/luxx/tools/executor.py
@ -156,7 +156,7 @@ class ToolExecutor:
            "tool_call_id": call_id,
            "role": "tool",
            "name": name,
-            "content": json.dumps(result)
+            "content": json.dumps(result, ensure_ascii=False)
        }
    
    def _create_error_result(self, call_id: str, name: str, error: str) -> Dict[str, Any]:
@ -165,7 +165,7 @@ class ToolExecutor:
            "tool_call_id": call_id,
            "role": "tool",
            "name": name,
-            "content": json.dumps({"success": False, "error": error})
+            "content": json.dumps({"success": False, "error": error}, ensure_ascii=False)
        }
    
    def clear_cache(self) -> None:
--- a/luxx/tools/services.py
+++ b/luxx/tools/services.py
@ -0,0 +1,247 @@
+"""Tool helper services"""
+import re
+import httpx
+from urllib.parse import parse_qs, urlparse
+from typing import List
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+
+class SearchService:
+    """Search service using DuckDuckGo"""
+
+    def __init__(self, engine: str = "duckduckgo"):
+        self.engine = engine
+
+    def search(
+        self,
+        query: str,
+        max_results: int = 5,
+        region: str = "cn-zh"
+    ) -> List[dict]:
+        """
+        Execute search
+
+        Args:
+            query: Search keywords
+            max_results: Max result count
+            region: Region setting
+
+        Returns:
+            Search result list
+        """
+        if self.engine == "duckduckgo":
+            return self._search_duckduckgo(query, max_results, region)
+        else:
+            raise ValueError(f"Unsupported search engine: {self.engine}")
+
+    def _search_duckduckgo(
+        self,
+        query: str,
+        max_results: int,
+        region: str
+    ) -> List[dict]:
+        """DuckDuckGo search via HTML"""
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
+            "Accept": "text/html,application/xhtml+xml",
+        }
+
+        from urllib.parse import quote
+        encoded_query = quote(query)
+        url = f"https://html.duckduckgo.com/html/?q={encoded_query}"
+
+        try:
+            resp = httpx.get(url, headers=headers, timeout=15, follow_redirects=True)
+            resp.raise_for_status()
+        except Exception:
+            return []
+
+        from bs4 import BeautifulSoup
+        soup = BeautifulSoup(resp.text, "html.parser")
+        results = []
+
+        for result in soup.select(".result")[:max_results]:
+            title_elem = result.select_one(".result__title a")
+            snippet_elem = result.select_one(".result__snippet")
+
+            if title_elem:
+                raw_url = title_elem.get("href", "")
+                # Clean DuckDuckGo redirect URL
+                if "uddg=" in raw_url:
+                    parsed = urlparse(raw_url)
+                    params = parse_qs(parsed.query)
+                    clean_url = params.get("uddg", [raw_url])[0]
+                else:
+                    clean_url = raw_url
+
+                results.append({
+                    "title": title_elem.get_text(strip=True),
+                    "url": clean_url,
+                    "snippet": snippet_elem.get_text(strip=True) if snippet_elem else ""
+                })
+
+        return results
+
+
+class FetchService:
+    """Page fetch service"""
+
+    def __init__(self, timeout: float = 15.0):
+        self.timeout = timeout
+        self.user_agent = (
+            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+            "AppleWebKit/537.36 (KHTML, like Gecko) "
+            "Chrome/120.0.0.0 Safari/537.36"
+        )
+
+    def fetch(
+        self,
+        url: str,
+        extract_type: str = "text"
+    ) -> dict:
+        """
+        Fetch a single page
+
+        Args:
+            url: Page URL
+            extract_type: Extract type (text, links, structured)
+
+        Returns:
+            Fetch result
+        """
+        if not url.startswith(("http://", "https://")):
+            url = "https://" + url
+
+        try:
+            resp = httpx.get(
+                url,
+                timeout=self.timeout,
+                follow_redirects=True,
+                headers={"User-Agent": self.user_agent}
+            )
+            resp.raise_for_status()
+        except httpx.TimeoutException:
+            return {"error": "Request timeout", "url": url}
+        except Exception as e:
+            return {"error": str(e), "url": url}
+
+        html = resp.text
+        extractor = ContentExtractor(html)
+
+        if extract_type == "text":
+            return {
+                "url": url,
+                "title": extractor.extract_title(),
+                "text": extractor.extract_text()[:15000]
+            }
+        elif extract_type == "links":
+            return {
+                "url": url,
+                "links": extractor.extract_links()
+            }
+        else:
+            return extractor.extract_structured(url)
+
+    def fetch_batch(
+        self,
+        urls: List[str],
+        extract_type: str = "text",
+        max_concurrent: int = 5
+    ) -> List[dict]:
+        """
+        Batch fetch pages concurrently.
+
+        Args:
+            urls: URL list
+            extract_type: Extract type
+            max_concurrent: Max concurrent requests (1-5, default 5)
+
+        Returns:
+            Result list (same order as input URLs)
+        """
+        if len(urls) <= 1:
+            return [self.fetch(url, extract_type) for url in urls]
+
+        max_concurrent = min(max(max_concurrent, 1), 5)
+        results = [None] * len(urls)
+
+        with ThreadPoolExecutor(max_workers=max_concurrent) as pool:
+            futures = {
+                pool.submit(self.fetch, url, extract_type): i
+                for i, url in enumerate(urls)
+            }
+            for future in as_completed(futures):
+                idx = futures[future]
+                try:
+                    results[idx] = future.result()
+                except Exception as e:
+                    results[idx] = {"error": str(e)}
+
+        return results
+
+
+class ContentExtractor:
+    """Content extractor using BeautifulSoup"""
+
+    def __init__(self, html: str):
+        self.html = html
+        self._soup = None
+
+    @property
+    def soup(self):
+        if self._soup is None:
+            from bs4 import BeautifulSoup
+            self._soup = BeautifulSoup(self.html, "html.parser")
+        return self._soup
+
+    def extract_title(self) -> str:
+        """Extract page title"""
+        if self.soup.title:
+            return self.soup.title.string or ""
+        return ""
+
+    def extract_text(self) -> str:
+        """Extract plain text"""
+        # Remove script and style
+        for tag in self.soup(["script", "style", "nav", "footer", "header", "aside"]):
+            tag.decompose()
+
+        text = self.soup.get_text(separator="\n", strip=True)
+        # Clean extra whitespace
+        text = re.sub(r"\n{3,}", "\n\n", text)
+        return text
+
+    def extract_links(self, max_count: int = 50) -> List[dict]:
+        """Extract links"""
+        links = []
+        for a in self.soup.find_all("a", href=True):
+            text = a.get_text(strip=True)
+            href = a["href"]
+            if text and href and not href.startswith(("#", "javascript:", "mailto:", "tel:")):
+                links.append({"text": text, "url": href})
+            if len(links) >= max_count:
+                break
+        return links
+
+    def extract_structured(self, url: str = "") -> dict:
+        """Extract structured content"""
+        soup = self.soup
+
+        # Extract title
+        title = ""
+        if soup.title:
+            title = soup.title.string or ""
+
+        # Extract meta description
+        description = ""
+        meta_desc = soup.find("meta", attrs={"name": "description"})
+        if meta_desc:
+            description = meta_desc.get("content", "")
+
+        return {
+            "url": url,
+            "title": title.strip(),
+            "description": description.strip(),
+            "text": self.extract_text()[:5000],
+            "links": self.extract_links(20)
+        }
--- a/pyproject.toml
+++ b/pyproject.toml
@ -2,7 +2,6 @@
 name = "luxx"
 version = "1.0.0"
 description = "luxx - FastAPI + SQLAlchemy"
-readme = "docs/README.md"
 requires-python = ">=3.10"

 dependencies = [
@ -19,6 +18,7 @@ dependencies = [
    "requests>=2.31.0",
    "beautifulsoup4>=4.12.3",
    "lxml>=5.1.0",
+    "httpx>=0.26.0",
    "pyyaml>=6.0.1",
    "shortuuid>=1.0.11",
    "pydantic>=2.5.0",
@ -34,3 +34,6 @@ dev = [
    "black>=24.0.0",
    "ruff>=0.1.0",
 ]
+
+[tool.setuptools]
+packages = ["luxx"]
--- a/luxx/run.py
+++ b/luxx/run.py