This commit is contained in:
ViperEkura 2026-04-12 22:29:35 +08:00
parent e93ec6d94d
commit d2c02a3645
2 changed files with 34 additions and 15 deletions

View File

@ -106,8 +106,7 @@ class ChatService:
while iteration < MAX_ITERATIONS:
iteration += 1
print(f"[CHAT DEBUG] ====== Starting iteration {iteration} ======")
print(f"[CHAT DEBUG] Messages count: {len(messages)}")
print(f"[CHAT] Starting iteration {iteration}, messages: {len(messages)}")
tool_calls_this_round = None
@ -123,7 +122,6 @@ class ChatService:
if event_type == "content_delta":
content = event.get("content", "")
if content:
print(f"[CHAT DEBUG] Iteration {iteration} content: {content[:100]}...")
yield {"type": "text", "content": content}
elif event_type == "tool_call_delta":
@ -132,10 +130,10 @@ class ChatService:
elif event_type == "done":
tool_calls_this_round = event.get("tool_calls")
print(f"[CHAT DEBUG] Done event, tool_calls: {tool_calls_this_round}")
print(f"[CHAT] Done event, tool_calls: {tool_calls_this_round}")
if tool_calls_this_round and tools_enabled:
print(f"[CHAT DEBUG] Executing tools: {tool_calls_this_round}")
print(f"[CHAT] Executing tools")
yield {"type": "tool_call", "data": tool_calls_this_round}
tool_results = self.tool_executor.process_tool_calls_parallel(
@ -158,18 +156,17 @@ class ChatService:
yield {"type": "tool_result", "data": tool_results}
else:
print(f"[CHAT] Breaking: tool_calls={tool_calls_this_round}, tools_enabled={tools_enabled}")
break
if not tool_calls_this_round or not tools_enabled:
print(f"[CHAT] Breaking at outer")
break
yield {"type": "done"}
except Exception as e:
print(f"[CHAT ERROR] Exception in stream_response: {type(e).__name__}: {str(e)}")
yield {"type": "error", "error": str(e)}
except Exception as e:
print(f"[CHAT] Exception: {type(e).__name__}: {str(e)}")
yield {"type": "error", "error": str(e)}
def non_stream_response(

View File

@ -143,16 +143,22 @@ class LLMClient:
# Accumulators for tool calls (need to collect from delta chunks)
accumulated_tool_calls = {}
print(f"[LLM] Starting stream_call for model: {model}")
print(f"[LLM] Messages count: {len(messages)}")
try:
async with httpx.AsyncClient(timeout=120.0) as client:
print(f"[LLM] Sending request to {self.api_url}")
async with client.stream(
"POST",
self.api_url,
headers=self._build_headers(),
json=body
) as response:
print(f"[LLM] Response status: {response.status_code}")
response.raise_for_status()
chunk_count = 0
async for line in response.aiter_lines():
if not line.strip():
continue
@ -161,25 +167,36 @@ class LLMClient:
data_str = line[6:]
if data_str == "[DONE]":
yield {"type": "done"}
print(f"[LLM] Received [DONE], chunk_count: {chunk_count}")
# Don't yield done event for [DONE], the finish_reason will trigger it
continue
try:
chunk = json.loads(data_str)
chunk_count += 1
except json.JSONDecodeError:
print(f"[LLM] JSON decode error for: {data_str[:100]}")
continue
if "choices" not in chunk:
print(f"[LLM] No 'choices' in chunk")
continue
delta = chunk.get("choices", [{}])[0].get("delta", {})
# DeepSeek reasoner: use content if available, otherwise fall back to reasoning_content
# DeepSeek reasoner: prefer 'content' over 'reasoning_content'
content = delta.get("content")
reasoning = delta.get("reasoning_content", "")
# Print first few chunks for debugging
if chunk_count <= 3:
print(f"[LLM] delta: content={repr(content)[:30]}, reasoning={repr(reasoning)[:30]}")
if content and isinstance(content, str) and content.strip():
print(f"[LLM] Yielding content: {content[:50]}...")
yield {"type": "content_delta", "content": content}
elif reasoning:
print(f"[LLM] Yielding reasoning: {reasoning[:50]}...")
yield {"type": "content_delta", "content": reasoning}
# Accumulate tool calls from delta chunks (DeepSeek sends them incrementally)
@ -197,20 +214,25 @@ class LLMClient:
accumulated_tool_calls[idx]["function"]["arguments"] += tc["function"]["arguments"]
if tool_calls_delta:
print(f"[LLM] Found tool_calls in delta: {tool_calls_delta}")
yield {"type": "tool_call_delta", "tool_call": tool_calls_delta}
# Check for finish_reason to signal end of stream
choice = chunk.get("choices", [{}])[0]
finish_reason = choice.get("finish_reason")
if finish_reason:
# Build final tool_calls list from accumulated chunks
print(f"[LLM] finish_reason: {finish_reason}")
final_tool_calls = list(accumulated_tool_calls.values()) if accumulated_tool_calls else None
yield {"type": "done", "tool_calls": final_tool_calls}
except httpx.HTTPStatusError as e:
# Return error as an event instead of raising
error_text = e.response.text if e.response else str(e)
yield {"type": "error", "error": f"HTTP {e.response.status_code}: {error_text}"}
status_code = e.response.status_code if e.response else "?"
print(f"[LLM] HTTP error: {status_code}")
yield {"type": "error", "error": f"HTTP {status_code}: Request failed"}
except httpx.ResponseNotRead:
print(f"[LLM] ResponseNotRead error")
yield {"type": "error", "error": "Streaming response error"}
except Exception as e:
print(f"[LLM] Exception: {type(e).__name__}: {str(e)}")
yield {"type": "error", "error": str(e)}