refactor: 精简项目代码
This commit is contained in:
parent
a84b8617a6
commit
f10c5de950
|
|
@ -1,6 +1,6 @@
|
||||||
<script setup>
|
<script setup>
|
||||||
import { ref } from 'vue'
|
import { ref } from 'vue'
|
||||||
import { useAuth } from './composables/useAuth.js'
|
import { useAuth } from './utils/useAuth.js'
|
||||||
import AppSidebar from './components/AppSidebar.vue'
|
import AppSidebar from './components/AppSidebar.vue'
|
||||||
|
|
||||||
const { isLoggedIn } = useAuth()
|
const { isLoggedIn } = useAuth()
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,6 @@
|
||||||
<span class="step-label">思考中</span>
|
<span class="step-label">思考中</span>
|
||||||
<span class="step-brief">{{ item.brief || '正在思考...' }}</span>
|
<span class="step-brief">{{ item.brief || '正在思考...' }}</span>
|
||||||
<span v-if="streaming && item.key === lastThinkingKey" class="loading-dots">...</span>
|
<span v-if="streaming && item.key === lastThinkingKey" class="loading-dots">...</span>
|
||||||
<span v-else-if="item.content && item.content.length > 1024" class="truncate-hint">已截断</span>
|
|
||||||
<span class="arrow" :class="{ open: expandedKeys.has(item.key) }" v-html="chevronDown"></span>
|
<span class="arrow" :class="{ open: expandedKeys.has(item.key) }" v-html="chevronDown"></span>
|
||||||
</div>
|
</div>
|
||||||
<div v-if="expandedKeys.has(item.key)" class="step-content">
|
<div v-if="expandedKeys.has(item.key)" class="step-content">
|
||||||
|
|
@ -26,7 +25,6 @@
|
||||||
<span v-if="item.loading" class="loading-dots">...</span>
|
<span v-if="item.loading" class="loading-dots">...</span>
|
||||||
<span v-else-if="item.isSuccess === true" class="step-badge success">成功</span>
|
<span v-else-if="item.isSuccess === true" class="step-badge success">成功</span>
|
||||||
<span v-else-if="item.isSuccess === false" class="step-badge error">失败</span>
|
<span v-else-if="item.isSuccess === false" class="step-badge error">失败</span>
|
||||||
<span v-if="item.fullResult && item.fullResult.length > 1024" class="truncate-hint">已截断</span>
|
|
||||||
<span class="arrow" :class="{ open: expandedKeys.has(item.key) }" v-html="chevronDown"></span>
|
<span class="arrow" :class="{ open: expandedKeys.has(item.key) }" v-html="chevronDown"></span>
|
||||||
</div>
|
</div>
|
||||||
<div v-if="expandedKeys.has(item.key)" class="step-content">
|
<div v-if="expandedKeys.has(item.key)" class="step-content">
|
||||||
|
|
@ -79,7 +77,7 @@ const allItems = computed(() => {
|
||||||
type: 'thinking',
|
type: 'thinking',
|
||||||
index: step.index,
|
index: step.index,
|
||||||
content: content,
|
content: content,
|
||||||
displayContent: content.length > 1024 ? content.slice(0, 1024) + '\n\n[... 内容已截断 ...]' : content,
|
displayContent: content.length > 1024 ? content.slice(0, 1024) + '...' : content,
|
||||||
brief: content.slice(0, 50) + (content.length > 50 ? '...' : ''),
|
brief: content.slice(0, 50) + (content.length > 50 ? '...' : ''),
|
||||||
})
|
})
|
||||||
} else if (step.type === 'tool_call') {
|
} else if (step.type === 'tool_call') {
|
||||||
|
|
@ -104,7 +102,7 @@ const allItems = computed(() => {
|
||||||
const resultContent = step.content || ''
|
const resultContent = step.content || ''
|
||||||
match.resultSummary = resultContent.slice(0, 200)
|
match.resultSummary = resultContent.slice(0, 200)
|
||||||
match.fullResult = resultContent
|
match.fullResult = resultContent
|
||||||
match.displayResult = resultContent.length > 1024 ? resultContent.slice(0, 1024) + '\n\n[... 结果已截断 ...]' : resultContent
|
match.displayResult = resultContent.length > 1024 ? resultContent.slice(0, 1024) + '...' : resultContent
|
||||||
match.isSuccess = step.success !== false
|
match.isSuccess = step.success !== false
|
||||||
match.loading = false
|
match.loading = false
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -122,7 +120,7 @@ const allItems = computed(() => {
|
||||||
isSuccess: true,
|
isSuccess: true,
|
||||||
resultSummary: placeholderContent.slice(0, 200),
|
resultSummary: placeholderContent.slice(0, 200),
|
||||||
fullResult: placeholderContent,
|
fullResult: placeholderContent,
|
||||||
displayResult: placeholderContent.length > 1024 ? placeholderContent.slice(0, 1024) + '\n\n[... 结果已截断 ...]' : placeholderContent
|
displayResult: placeholderContent.length > 1024 ? placeholderContent.slice(0, 1024) + '...' : placeholderContent
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
} else if (step.type === 'text') {
|
} else if (step.type === 'text') {
|
||||||
|
|
@ -288,15 +286,6 @@ const sparkleIcon = `<svg viewBox="0 0 24 24" width="14" height="14" fill="none"
|
||||||
color: var(--success-color);
|
color: var(--success-color);
|
||||||
}
|
}
|
||||||
|
|
||||||
.truncate-hint {
|
|
||||||
font-size: 10px;
|
|
||||||
padding: 2px 6px;
|
|
||||||
background: var(--warning-bg);
|
|
||||||
color: var(--warning-color);
|
|
||||||
border-radius: 4px;
|
|
||||||
margin-left: 4px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.step-badge.error {
|
.step-badge.error {
|
||||||
background: var(--danger-bg);
|
background: var(--danger-bg);
|
||||||
color: var(--danger-color);
|
color: var(--danger-color);
|
||||||
|
|
|
||||||
|
|
@ -1,5 +0,0 @@
|
||||||
// 导出所有组合式函数
|
|
||||||
export { useAuth } from './useAuth.js'
|
|
||||||
export { useApi, usePagination, useForm } from './useApi.js'
|
|
||||||
export { formatDate, formatNumber, truncate } from './useFormatters.js'
|
|
||||||
export { debounce, throttle, storage, copyToClipboard } from './useUtils.js'
|
|
||||||
|
|
@ -2,7 +2,7 @@ import { createApp } from 'vue'
|
||||||
import './style.css'
|
import './style.css'
|
||||||
import App from './App.vue'
|
import App from './App.vue'
|
||||||
import router from './router'
|
import router from './router'
|
||||||
import pinia from './stores'
|
import { pinia } from './utils'
|
||||||
|
|
||||||
createApp(App)
|
createApp(App)
|
||||||
.use(router)
|
.use(router)
|
||||||
|
|
|
||||||
|
|
@ -1,60 +0,0 @@
|
||||||
import { defineStore } from 'pinia'
|
|
||||||
import api from '../services/api'
|
|
||||||
|
|
||||||
export const useAuthStore = defineStore('auth', {
|
|
||||||
state: () => ({
|
|
||||||
user: null,
|
|
||||||
token: localStorage.getItem('access_token') || null,
|
|
||||||
isAuthenticated: !!localStorage.getItem('access_token')
|
|
||||||
}),
|
|
||||||
|
|
||||||
actions: {
|
|
||||||
async login(credentials) {
|
|
||||||
try {
|
|
||||||
const response = await api.post('/auth/login', credentials)
|
|
||||||
this.token = response.data.access_token
|
|
||||||
this.user = response.data.user
|
|
||||||
this.isAuthenticated = true
|
|
||||||
localStorage.setItem('access_token', this.token)
|
|
||||||
return { success: true }
|
|
||||||
} catch (error) {
|
|
||||||
return { success: false, error: error.message }
|
|
||||||
}
|
|
||||||
},
|
|
||||||
|
|
||||||
async register(userData) {
|
|
||||||
try {
|
|
||||||
const response = await api.post('/auth/register', userData)
|
|
||||||
// 注册后自动登录
|
|
||||||
return this.login({ username: userData.username, password: userData.password })
|
|
||||||
} catch (error) {
|
|
||||||
return { success: false, error: error.message }
|
|
||||||
}
|
|
||||||
},
|
|
||||||
|
|
||||||
async logout() {
|
|
||||||
try {
|
|
||||||
await api.post('/auth/logout')
|
|
||||||
} catch (error) {
|
|
||||||
// 忽略错误
|
|
||||||
}
|
|
||||||
this.token = null
|
|
||||||
this.user = null
|
|
||||||
this.isAuthenticated = false
|
|
||||||
localStorage.removeItem('access_token')
|
|
||||||
},
|
|
||||||
|
|
||||||
async fetchUser() {
|
|
||||||
try {
|
|
||||||
const response = await api.get('/auth/me')
|
|
||||||
this.user = response.data
|
|
||||||
return { success: true }
|
|
||||||
} catch (error) {
|
|
||||||
this.token = null
|
|
||||||
this.isAuthenticated = false
|
|
||||||
localStorage.removeItem('access_token')
|
|
||||||
return { success: false }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
@ -0,0 +1,25 @@
|
||||||
|
/**
|
||||||
|
* Luxx 前端工具库
|
||||||
|
* 合并了 composables、services 和 stores 的统一导出
|
||||||
|
*/
|
||||||
|
|
||||||
|
// ============ API 服务 ============
|
||||||
|
export { default as api, authAPI, conversationsAPI, messagesAPI, toolsAPI, providersAPI, createSSEStream } from './api.js'
|
||||||
|
|
||||||
|
// ============ Pinia 状态管理 ============
|
||||||
|
export { default as pinia } from './store.js'
|
||||||
|
|
||||||
|
// ============ 认证相关 ============
|
||||||
|
export { useAuth } from './useAuth.js'
|
||||||
|
|
||||||
|
// ============ API 请求组合式函数 ============
|
||||||
|
export { useApi, usePagination, useForm } from './useApi.js'
|
||||||
|
|
||||||
|
// ============ 格式化工具 ============
|
||||||
|
export { formatDate, formatNumber, truncate, formatFileSize, capitalize, formatTokens } from './useFormatters.js'
|
||||||
|
|
||||||
|
// ============ 通用工具函数 ============
|
||||||
|
export { debounce, throttle, deepClone, generateId, storage, getDeviceType, copyToClipboard } from './useUtils.js'
|
||||||
|
|
||||||
|
// ============ Markdown 渲染 ============
|
||||||
|
export { renderMarkdown } from './markdown.js'
|
||||||
|
|
@ -3,6 +3,3 @@ import { createPinia } from 'pinia'
|
||||||
const pinia = createPinia()
|
const pinia = createPinia()
|
||||||
|
|
||||||
export default pinia
|
export default pinia
|
||||||
|
|
||||||
// 方便导入 store
|
|
||||||
export * from './auth'
|
|
||||||
|
|
@ -51,7 +51,7 @@ export function deepClone(obj) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 生成随机 ID
|
* 生成随机 Id
|
||||||
* @param {number} length - 长度
|
* @param {number} length - 长度
|
||||||
*/
|
*/
|
||||||
export function generateId(length = 8) {
|
export function generateId(length = 8) {
|
||||||
|
|
@ -48,8 +48,8 @@
|
||||||
<script setup>
|
<script setup>
|
||||||
import { ref, reactive } from 'vue'
|
import { ref, reactive } from 'vue'
|
||||||
import { useRouter } from 'vue-router'
|
import { useRouter } from 'vue-router'
|
||||||
import { authAPI } from '../services/api.js'
|
import { authAPI } from '../utils/api.js'
|
||||||
import { useAuth } from '../composables/useAuth.js'
|
import { useAuth } from '../utils/useAuth.js'
|
||||||
|
|
||||||
const router = useRouter()
|
const router = useRouter()
|
||||||
const { login } = useAuth()
|
const { login } = useAuth()
|
||||||
|
|
|
||||||
|
|
@ -93,7 +93,7 @@
|
||||||
<script setup>
|
<script setup>
|
||||||
import { ref, computed, onMounted, nextTick, watch } from 'vue'
|
import { ref, computed, onMounted, nextTick, watch } from 'vue'
|
||||||
import { useRoute } from 'vue-router'
|
import { useRoute } from 'vue-router'
|
||||||
import { conversationsAPI, messagesAPI } from '../services/api.js'
|
import { conversationsAPI, messagesAPI } from '../utils/api.js'
|
||||||
import ProcessBlock from '../components/ProcessBlock.vue'
|
import ProcessBlock from '../components/ProcessBlock.vue'
|
||||||
import MessageBubble from '../components/MessageBubble.vue'
|
import MessageBubble from '../components/MessageBubble.vue'
|
||||||
import { renderMarkdown } from '../utils/markdown.js'
|
import { renderMarkdown } from '../utils/markdown.js'
|
||||||
|
|
|
||||||
|
|
@ -54,7 +54,7 @@
|
||||||
<script setup>
|
<script setup>
|
||||||
import { ref, computed, onMounted } from 'vue'
|
import { ref, computed, onMounted } from 'vue'
|
||||||
import { useRouter } from 'vue-router'
|
import { useRouter } from 'vue-router'
|
||||||
import { conversationsAPI, providersAPI } from '../services/api.js'
|
import { conversationsAPI, providersAPI } from '../utils/api.js'
|
||||||
|
|
||||||
const router = useRouter()
|
const router = useRouter()
|
||||||
const list = ref([])
|
const list = ref([])
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@
|
||||||
|
|
||||||
<script setup>
|
<script setup>
|
||||||
import { ref, onMounted } from 'vue'
|
import { ref, onMounted } from 'vue'
|
||||||
import { conversationsAPI, toolsAPI } from '../services/api.js'
|
import { conversationsAPI, toolsAPI } from '../utils/api.js'
|
||||||
|
|
||||||
const stats = ref({ conversations: 0, tools: 0, messages: 0, models: 1 })
|
const stats = ref({ conversations: 0, tools: 0, messages: 0, models: 1 })
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -133,9 +133,9 @@
|
||||||
|
|
||||||
<script setup>
|
<script setup>
|
||||||
import { ref, onMounted } from 'vue'
|
import { ref, onMounted } from 'vue'
|
||||||
import { providersAPI } from '../services/api.js'
|
import { providersAPI } from '../utils/api.js'
|
||||||
import { useAuth } from '../composables/useAuth.js'
|
import { useAuth } from '../utils/useAuth.js'
|
||||||
import { authAPI } from '../services/api.js'
|
import { authAPI } from '../utils/api.js'
|
||||||
import { useRouter } from 'vue-router'
|
import { useRouter } from 'vue-router'
|
||||||
|
|
||||||
const router = useRouter()
|
const router = useRouter()
|
||||||
|
|
|
||||||
|
|
@ -36,7 +36,7 @@
|
||||||
|
|
||||||
<script setup>
|
<script setup>
|
||||||
import { ref, onMounted } from 'vue'
|
import { ref, onMounted } from 'vue'
|
||||||
import { toolsAPI } from '../services/api.js'
|
import { toolsAPI } from '../utils/api.js'
|
||||||
|
|
||||||
const list = ref([])
|
const list = ref([])
|
||||||
const loading = ref(true)
|
const loading = ref(true)
|
||||||
|
|
|
||||||
|
|
@ -1,128 +1,49 @@
|
||||||
"""Crawler related tools"""
|
"""Crawler tools"""
|
||||||
from luxx.tools.factory import tool
|
from luxx.tools.factory import tool
|
||||||
from luxx.tools.services import SearchService, FetchService
|
from luxx.tools.services import SearchService, FetchService
|
||||||
|
|
||||||
|
|
||||||
@tool(
|
@tool(name="web_search", description="Search the internet. Use when you need to find latest news or answer questions.", parameters={
|
||||||
name="web_search",
|
"type": "object",
|
||||||
description="Search the internet for information. Use when you need to find latest news or answer questions.",
|
"properties": {
|
||||||
parameters={
|
"query": {"type": "string", "description": "Search keywords"},
|
||||||
"type": "object",
|
"max_results": {"type": "integer", "description": "Number of results, default 5", "default": 5}
|
||||||
"properties": {
|
|
||||||
"query": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "Search keywords"
|
|
||||||
},
|
|
||||||
"max_results": {
|
|
||||||
"type": "integer",
|
|
||||||
"description": "Number of results to return, default 5",
|
|
||||||
"default": 5
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["query"]
|
|
||||||
},
|
},
|
||||||
category="crawler"
|
"required": ["query"]
|
||||||
)
|
}, category="crawler")
|
||||||
def web_search(arguments: dict) -> dict:
|
def web_search(arguments: dict) -> dict:
|
||||||
"""
|
results = SearchService().search(arguments["query"], arguments.get("max_results", 5))
|
||||||
Web search tool using DuckDuckGo
|
return {"success": True, "data": {"query": arguments["query"], "results": results or []}}
|
||||||
"""
|
|
||||||
query = arguments["query"]
|
|
||||||
max_results = arguments.get("max_results", 5)
|
|
||||||
|
|
||||||
service = SearchService()
|
|
||||||
results = service.search(query, max_results)
|
|
||||||
|
|
||||||
if not results:
|
|
||||||
return {"success": True, "data": {"query": query, "results": []}, "message": "No results found"}
|
|
||||||
|
|
||||||
return {"success": True, "data": {"query": query, "results": results}}
|
|
||||||
|
|
||||||
|
|
||||||
@tool(
|
@tool(name="web_fetch", description="Fetch content from a webpage.", parameters={
|
||||||
name="web_fetch",
|
"type": "object",
|
||||||
description="Fetch content from a webpage. Use when user needs detailed information from a page.",
|
"properties": {
|
||||||
parameters={
|
"url": {"type": "string", "description": "URL to fetch"},
|
||||||
"type": "object",
|
"extract_type": {"type": "string", "enum": ["text", "links", "structured"], "default": "text"}
|
||||||
"properties": {
|
|
||||||
"url": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "URL of the webpage to fetch"
|
|
||||||
},
|
|
||||||
"extract_type": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "Extraction type: text, links, or structured",
|
|
||||||
"enum": ["text", "links", "structured"],
|
|
||||||
"default": "text"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["url"]
|
|
||||||
},
|
},
|
||||||
category="crawler"
|
"required": ["url"]
|
||||||
)
|
}, category="crawler")
|
||||||
def web_fetch(arguments: dict) -> dict:
|
def web_fetch(arguments: dict) -> dict:
|
||||||
"""
|
if not arguments.get("url"):
|
||||||
Page fetch tool
|
|
||||||
"""
|
|
||||||
url = arguments["url"]
|
|
||||||
extract_type = arguments.get("extract_type", "text")
|
|
||||||
|
|
||||||
if not url:
|
|
||||||
return {"success": False, "error": "URL is required"}
|
return {"success": False, "error": "URL is required"}
|
||||||
|
result = FetchService().fetch(arguments["url"], arguments.get("extract_type", "text"))
|
||||||
service = FetchService(timeout=15)
|
return {"success": "error" not in result, "data": result, "error": result.get("error")}
|
||||||
result = service.fetch(url, extract_type)
|
|
||||||
|
|
||||||
if "error" in result:
|
|
||||||
return {"success": False, "error": result["error"]}
|
|
||||||
|
|
||||||
return {"success": True, "data": result}
|
|
||||||
|
|
||||||
|
|
||||||
@tool(
|
@tool(name="batch_fetch", description="Batch fetch multiple webpages.", parameters={
|
||||||
name="batch_fetch",
|
"type": "object",
|
||||||
description="Batch fetch multiple webpages. Use when you need to get content from multiple pages.",
|
"properties": {
|
||||||
parameters={
|
"urls": {"type": "array", "items": {"type": "string"}, "description": "URLs to fetch"},
|
||||||
"type": "object",
|
"extract_type": {"type": "string", "enum": ["text", "links", "structured"], "default": "text"}
|
||||||
"properties": {
|
|
||||||
"urls": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {"type": "string"},
|
|
||||||
"description": "List of URLs to fetch"
|
|
||||||
},
|
|
||||||
"extract_type": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": ["text", "links", "structured"],
|
|
||||||
"default": "text"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["urls"]
|
|
||||||
},
|
},
|
||||||
category="crawler"
|
"required": ["urls"]
|
||||||
)
|
}, category="crawler")
|
||||||
def batch_fetch(arguments: dict) -> dict:
|
def batch_fetch(arguments: dict) -> dict:
|
||||||
"""
|
urls = arguments.get("urls", [])
|
||||||
Batch fetch tool
|
|
||||||
"""
|
|
||||||
urls = arguments["urls"]
|
|
||||||
extract_type = arguments.get("extract_type", "text")
|
|
||||||
|
|
||||||
if not urls:
|
if not urls:
|
||||||
return {"success": False, "error": "URLs list is required"}
|
return {"success": False, "error": "URLs list is required"}
|
||||||
|
|
||||||
if len(urls) > 10:
|
if len(urls) > 10:
|
||||||
return {"success": False, "error": "Maximum 10 pages allowed"}
|
return {"success": False, "error": "Maximum 10 pages allowed"}
|
||||||
|
results = FetchService().fetch_batch(urls, arguments.get("extract_type", "text"))
|
||||||
service = FetchService(timeout=10)
|
return {"success": True, "data": {"results": results, "total": len(results)}}
|
||||||
results = service.fetch_batch(urls, extract_type)
|
|
||||||
|
|
||||||
successful = sum(1 for r in results if "error" not in r)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"success": True,
|
|
||||||
"data": {
|
|
||||||
"results": results,
|
|
||||||
"total": len(results),
|
|
||||||
"successful": successful
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -1,54 +1,18 @@
|
||||||
"""Tool helper services"""
|
"""Tool helper services"""
|
||||||
import re
|
import re
|
||||||
import httpx
|
import httpx
|
||||||
from urllib.parse import parse_qs, urlparse
|
from urllib.parse import parse_qs, urlparse, quote
|
||||||
from typing import List
|
from typing import List
|
||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
class SearchService:
|
class SearchService:
|
||||||
"""Search service using DuckDuckGo"""
|
"""DuckDuckGo search"""
|
||||||
|
|
||||||
def __init__(self, engine: str = "duckduckgo"):
|
def search(self, query: str, max_results: int = 5) -> List[dict]:
|
||||||
self.engine = engine
|
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
|
||||||
|
url = f"https://html.duckduckgo.com/html/?q={quote(query)}"
|
||||||
def search(
|
|
||||||
self,
|
|
||||||
query: str,
|
|
||||||
max_results: int = 5,
|
|
||||||
region: str = "cn-zh"
|
|
||||||
) -> List[dict]:
|
|
||||||
"""
|
|
||||||
Execute search
|
|
||||||
|
|
||||||
Args:
|
|
||||||
query: Search keywords
|
|
||||||
max_results: Max result count
|
|
||||||
region: Region setting
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Search result list
|
|
||||||
"""
|
|
||||||
if self.engine == "duckduckgo":
|
|
||||||
return self._search_duckduckgo(query, max_results, region)
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unsupported search engine: {self.engine}")
|
|
||||||
|
|
||||||
def _search_duckduckgo(
|
|
||||||
self,
|
|
||||||
query: str,
|
|
||||||
max_results: int,
|
|
||||||
region: str
|
|
||||||
) -> List[dict]:
|
|
||||||
"""DuckDuckGo search via HTML"""
|
|
||||||
headers = {
|
|
||||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
|
||||||
"Accept": "text/html,application/xhtml+xml",
|
|
||||||
}
|
|
||||||
|
|
||||||
from urllib.parse import quote
|
|
||||||
encoded_query = quote(query)
|
|
||||||
url = f"https://html.duckduckgo.com/html/?q={encoded_query}"
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
resp = httpx.get(url, headers=headers, timeout=15, follow_redirects=True)
|
resp = httpx.get(url, headers=headers, timeout=15, follow_redirects=True)
|
||||||
|
|
@ -56,7 +20,6 @@ class SearchService:
|
||||||
except Exception:
|
except Exception:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
soup = BeautifulSoup(resp.text, "html.parser")
|
soup = BeautifulSoup(resp.text, "html.parser")
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
|
|
@ -66,10 +29,8 @@ class SearchService:
|
||||||
|
|
||||||
if title_elem:
|
if title_elem:
|
||||||
raw_url = title_elem.get("href", "")
|
raw_url = title_elem.get("href", "")
|
||||||
# Clean DuckDuckGo redirect URL
|
|
||||||
if "uddg=" in raw_url:
|
if "uddg=" in raw_url:
|
||||||
parsed = urlparse(raw_url)
|
params = parse_qs(urlparse(raw_url).query)
|
||||||
params = parse_qs(parsed.query)
|
|
||||||
clean_url = params.get("uddg", [raw_url])[0]
|
clean_url = params.get("uddg", [raw_url])[0]
|
||||||
else:
|
else:
|
||||||
clean_url = raw_url
|
clean_url = raw_url
|
||||||
|
|
@ -84,164 +45,55 @@ class SearchService:
|
||||||
|
|
||||||
|
|
||||||
class FetchService:
|
class FetchService:
|
||||||
"""Page fetch service"""
|
"""Page fetch with concurrent support"""
|
||||||
|
|
||||||
def __init__(self, timeout: float = 15.0):
|
def __init__(self, timeout: float = 15.0):
|
||||||
self.timeout = timeout
|
self.timeout = timeout
|
||||||
self.user_agent = (
|
self.user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
|
||||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
||||||
"Chrome/120.0.0.0 Safari/537.36"
|
|
||||||
)
|
|
||||||
|
|
||||||
def fetch(
|
def fetch(self, url: str, extract_type: str = "text") -> dict:
|
||||||
self,
|
|
||||||
url: str,
|
|
||||||
extract_type: str = "text"
|
|
||||||
) -> dict:
|
|
||||||
"""
|
|
||||||
Fetch a single page
|
|
||||||
|
|
||||||
Args:
|
|
||||||
url: Page URL
|
|
||||||
extract_type: Extract type (text, links, structured)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Fetch result
|
|
||||||
"""
|
|
||||||
if not url.startswith(("http://", "https://")):
|
if not url.startswith(("http://", "https://")):
|
||||||
url = "https://" + url
|
url = "https://" + url
|
||||||
|
|
||||||
try:
|
try:
|
||||||
resp = httpx.get(
|
resp = httpx.get(url, timeout=self.timeout, follow_redirects=True, headers={"User-Agent": self.user_agent})
|
||||||
url,
|
|
||||||
timeout=self.timeout,
|
|
||||||
follow_redirects=True,
|
|
||||||
headers={"User-Agent": self.user_agent}
|
|
||||||
)
|
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
except httpx.TimeoutException:
|
except httpx.TimeoutException:
|
||||||
return {"error": "Request timeout", "url": url}
|
return {"error": "Request timeout"}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {"error": str(e), "url": url}
|
return {"error": str(e)}
|
||||||
|
|
||||||
html = resp.text
|
soup = BeautifulSoup(resp.text, "html.parser")
|
||||||
extractor = ContentExtractor(html)
|
title = soup.title.string if soup.title else ""
|
||||||
|
|
||||||
if extract_type == "text":
|
# Remove noise
|
||||||
return {
|
for tag in soup(["script", "style", "nav", "footer", "header", "aside"]):
|
||||||
"url": url,
|
tag.decompose()
|
||||||
"title": extractor.extract_title(),
|
|
||||||
"text": extractor.extract_text()[:15000]
|
|
||||||
}
|
|
||||||
elif extract_type == "links":
|
|
||||||
return {
|
|
||||||
"url": url,
|
|
||||||
"links": extractor.extract_links()
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
return extractor.extract_structured(url)
|
|
||||||
|
|
||||||
def fetch_batch(
|
if extract_type == "links":
|
||||||
self,
|
links = [{"text": a.get_text(strip=True), "url": a["href"]}
|
||||||
urls: List[str],
|
for a in soup.find_all("a", href=True)
|
||||||
extract_type: str = "text",
|
if a.get_text(strip=True) and not a["href"].startswith(("#", "javascript:"))]
|
||||||
max_concurrent: int = 5
|
return {"url": url, "links": links[:50]}
|
||||||
) -> List[dict]:
|
|
||||||
"""
|
|
||||||
Batch fetch pages concurrently.
|
|
||||||
|
|
||||||
Args:
|
text = re.sub(r"\n{3,}", "\n\n", soup.get_text(separator="\n", strip=True))
|
||||||
urls: URL list
|
|
||||||
extract_type: Extract type
|
|
||||||
max_concurrent: Max concurrent requests (1-5, default 5)
|
|
||||||
|
|
||||||
Returns:
|
if extract_type == "structured":
|
||||||
Result list (same order as input URLs)
|
meta_desc = soup.find("meta", attrs={"name": "description"})
|
||||||
"""
|
return {"url": url, "title": title, "description": (meta_desc.get("content", "") if meta_desc else ""), "text": text[:5000]}
|
||||||
|
|
||||||
|
return {"url": url, "title": title, "text": text[:15000]}
|
||||||
|
|
||||||
|
def fetch_batch(self, urls: List[str], extract_type: str = "text", max_concurrent: int = 5) -> List[dict]:
|
||||||
if len(urls) <= 1:
|
if len(urls) <= 1:
|
||||||
return [self.fetch(url, extract_type) for url in urls]
|
return [self.fetch(url, extract_type) for url in urls]
|
||||||
|
|
||||||
max_concurrent = min(max(max_concurrent, 1), 5)
|
|
||||||
results = [None] * len(urls)
|
results = [None] * len(urls)
|
||||||
|
max_concurrent = min(max(max_concurrent, 1), 5)
|
||||||
|
|
||||||
with ThreadPoolExecutor(max_workers=max_concurrent) as pool:
|
with ThreadPoolExecutor(max_workers=max_concurrent) as pool:
|
||||||
futures = {
|
futures = {pool.submit(self.fetch, url, extract_type): i for i, url in enumerate(urls)}
|
||||||
pool.submit(self.fetch, url, extract_type): i
|
|
||||||
for i, url in enumerate(urls)
|
|
||||||
}
|
|
||||||
for future in as_completed(futures):
|
for future in as_completed(futures):
|
||||||
idx = futures[future]
|
results[futures[future]] = future.result()
|
||||||
try:
|
|
||||||
results[idx] = future.result()
|
|
||||||
except Exception as e:
|
|
||||||
results[idx] = {"error": str(e)}
|
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
class ContentExtractor:
|
|
||||||
"""Content extractor using BeautifulSoup"""
|
|
||||||
|
|
||||||
def __init__(self, html: str):
|
|
||||||
self.html = html
|
|
||||||
self._soup = None
|
|
||||||
|
|
||||||
@property
|
|
||||||
def soup(self):
|
|
||||||
if self._soup is None:
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
self._soup = BeautifulSoup(self.html, "html.parser")
|
|
||||||
return self._soup
|
|
||||||
|
|
||||||
def extract_title(self) -> str:
|
|
||||||
"""Extract page title"""
|
|
||||||
if self.soup.title:
|
|
||||||
return self.soup.title.string or ""
|
|
||||||
return ""
|
|
||||||
|
|
||||||
def extract_text(self) -> str:
|
|
||||||
"""Extract plain text"""
|
|
||||||
# Remove script and style
|
|
||||||
for tag in self.soup(["script", "style", "nav", "footer", "header", "aside"]):
|
|
||||||
tag.decompose()
|
|
||||||
|
|
||||||
text = self.soup.get_text(separator="\n", strip=True)
|
|
||||||
# Clean extra whitespace
|
|
||||||
text = re.sub(r"\n{3,}", "\n\n", text)
|
|
||||||
return text
|
|
||||||
|
|
||||||
def extract_links(self, max_count: int = 50) -> List[dict]:
|
|
||||||
"""Extract links"""
|
|
||||||
links = []
|
|
||||||
for a in self.soup.find_all("a", href=True):
|
|
||||||
text = a.get_text(strip=True)
|
|
||||||
href = a["href"]
|
|
||||||
if text and href and not href.startswith(("#", "javascript:", "mailto:", "tel:")):
|
|
||||||
links.append({"text": text, "url": href})
|
|
||||||
if len(links) >= max_count:
|
|
||||||
break
|
|
||||||
return links
|
|
||||||
|
|
||||||
def extract_structured(self, url: str = "") -> dict:
|
|
||||||
"""Extract structured content"""
|
|
||||||
soup = self.soup
|
|
||||||
|
|
||||||
# Extract title
|
|
||||||
title = ""
|
|
||||||
if soup.title:
|
|
||||||
title = soup.title.string or ""
|
|
||||||
|
|
||||||
# Extract meta description
|
|
||||||
description = ""
|
|
||||||
meta_desc = soup.find("meta", attrs={"name": "description"})
|
|
||||||
if meta_desc:
|
|
||||||
description = meta_desc.get("content", "")
|
|
||||||
|
|
||||||
return {
|
|
||||||
"url": url,
|
|
||||||
"title": title.strip(),
|
|
||||||
"description": description.strip(),
|
|
||||||
"text": self.extract_text()[:5000],
|
|
||||||
"links": self.extract_links(20)
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -7,8 +7,6 @@ requires-python = ">=3.10"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"fastapi>=0.109.0",
|
"fastapi>=0.109.0",
|
||||||
"uvicorn[standard]>=0.27.0",
|
"uvicorn[standard]>=0.27.0",
|
||||||
"python-multipart>=0.0.6",
|
|
||||||
"sse-starlette>=2.0.0",
|
|
||||||
"sqlalchemy>=2.0.25",
|
"sqlalchemy>=2.0.25",
|
||||||
"aiosqlite>=0.19.0",
|
"aiosqlite>=0.19.0",
|
||||||
"pyjwt>=2.8.0",
|
"pyjwt>=2.8.0",
|
||||||
|
|
@ -18,22 +16,16 @@ dependencies = [
|
||||||
"requests>=2.31.0",
|
"requests>=2.31.0",
|
||||||
"beautifulsoup4>=4.12.3",
|
"beautifulsoup4>=4.12.3",
|
||||||
"lxml>=5.1.0",
|
"lxml>=5.1.0",
|
||||||
"httpx>=0.26.0",
|
|
||||||
"pyyaml>=6.0.1",
|
"pyyaml>=6.0.1",
|
||||||
"shortuuid>=1.0.11",
|
|
||||||
"pydantic>=2.5.0",
|
"pydantic>=2.5.0",
|
||||||
"pydantic-settings>=2.1.0",
|
"pydantic-settings>=2.1.0",
|
||||||
"email-validator>=2.1.0",
|
"email-validator>=2.1.0",
|
||||||
|
"shortuuid>=1.0.11",
|
||||||
|
"sse-starlette>=2.0.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
dev = [
|
dev = ["pytest>=8.0.0", "pytest-asyncio>=0.23.0", "pytest-cov>=4.1.0", "black>=24.0.0", "ruff>=0.1.0"]
|
||||||
"pytest>=8.0.0",
|
|
||||||
"pytest-asyncio>=0.23.0",
|
|
||||||
"pytest-cov>=4.1.0",
|
|
||||||
"black>=24.0.0",
|
|
||||||
"ruff>=0.1.0",
|
|
||||||
]
|
|
||||||
|
|
||||||
[tool.setuptools]
|
[tool.setuptools]
|
||||||
packages = ["luxx"]
|
packages = ["luxx"]
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue