nanoClaw/backend/services/llm_client.py

140 lines
4.3 KiB
Python

"""OpenAI-compatible LLM API client
Supports any provider that follows the OpenAI chat completions API format:
- Zhipu GLM (open.bigmodel.cn)
- DeepSeek (api.deepseek.com)
- OpenAI, Moonshot, Qwen, etc.
"""
import os
import re
import time
import requests
from typing import Optional, List
def _resolve_env_vars(value: str) -> str:
"""Replace ${VAR} or $VAR with environment variable values."""
if not isinstance(value, str):
return value
def _replace(m):
var = m.group(1) or m.group(2)
return os.environ.get(var, m.group(0))
return re.sub(r'\$\{(\w+)\}|\$(\w+)', _replace, value)
def _detect_provider(api_url: str) -> str:
"""Detect provider from api_url, returns provider name."""
if "deepseek" in api_url:
return "deepseek"
elif "bigmodel" in api_url:
return "glm"
else:
return "openai"
class LLMClient:
"""OpenAI-compatible LLM API client.
Each model must have its own api_url and api_key configured in MODEL_CONFIG.
"""
def __init__(self, model_config: dict):
"""Initialize with per-model config lookup.
Args:
model_config: {model_id: {"api_url": ..., "api_key": ...}}
"""
self.model_config = model_config
def _get_credentials(self, model: str):
"""Get api_url and api_key for a model, with env-var expansion."""
cfg = self.model_config.get(model)
if not cfg:
raise ValueError(f"Unknown model: '{model}', not found in config")
api_url = _resolve_env_vars(cfg.get("api_url", ""))
api_key = _resolve_env_vars(cfg.get("api_key", ""))
if not api_url:
raise ValueError(f"Model '{model}' has no api_url configured")
if not api_key:
raise ValueError(f"Model '{model}' has no api_key configured")
return api_url, api_key
def _build_body(self, model, messages, max_tokens, temperature, thinking_enabled, tools, stream, api_url):
"""Build request body with provider-specific parameter adaptation."""
provider = _detect_provider(api_url)
body = {
"model": model,
"messages": messages,
"temperature": temperature,
}
# --- Provider-specific: max_tokens ---
if provider == "deepseek":
body["max_tokens"] = min(max_tokens, 8192)
elif provider == "glm":
body["max_tokens"] = min(max_tokens, 65536)
else:
body["max_tokens"] = max_tokens
# --- Provider-specific: thinking ---
if thinking_enabled:
if provider == "glm":
body["thinking"] = {"type": "enabled"}
elif provider == "deepseek":
pass # deepseek-reasoner has built-in reasoning, no extra param
# --- Provider-specific: tools ---
if tools:
body["tools"] = tools
body["tool_choice"] = "auto"
# --- Provider-specific: stream ---
if stream:
body["stream"] = True
if provider == "glm":
body["stream_options"] = {"include_usage": True}
elif provider == "deepseek":
pass # DeepSeek does not support stream_options
return body
def call(
self,
model: str,
messages: List[dict],
max_tokens: int = 65536,
temperature: float = 1.0,
thinking_enabled: bool = False,
tools: Optional[List[dict]] = None,
stream: bool = False,
timeout: int = 120,
max_retries: int = 3,
):
"""Call LLM API with retry on rate limit (429)"""
api_url, api_key = self._get_credentials(model)
body = self._build_body(
model, messages, max_tokens, temperature,
thinking_enabled, tools, stream, api_url,
)
for attempt in range(max_retries + 1):
resp = requests.post(
api_url,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
},
json=body,
stream=stream,
timeout=timeout,
)
if resp.status_code == 429 and attempt < max_retries:
wait = 2 ** attempt
time.sleep(wait)
continue
return resp
return resp