Luxx/luxx/services/chat.py

195 lines
6.5 KiB
Python

"""Chat service module"""
import json
from typing import List, Dict, Any, AsyncGenerator
from luxx.models import Conversation, Message
from luxx.tools.executor import ToolExecutor
from luxx.tools.core import registry
from luxx.services.llm_client import llm_client
# Maximum iterations to prevent infinite loops
MAX_ITERATIONS = 10
class ChatService:
"""Chat service"""
def __init__(self):
self.tool_executor = ToolExecutor()
def build_messages(
self,
conversation: Conversation,
include_system: bool = True
) -> List[Dict[str, str]]:
"""Build message list"""
messages = []
if include_system and conversation.system_prompt:
messages.append({
"role": "system",
"content": conversation.system_prompt
})
for msg in conversation.messages.order_by(Message.created_at).all():
messages.append({
"role": msg.role,
"content": msg.content
})
return messages
async def stream_response(
self,
conversation: Conversation,
user_message: str,
tools_enabled: bool = True
) -> AsyncGenerator[Dict[str, Any], None]:
"""
Streaming response generator
Event types:
- process_step: thinking/text/tool_call/tool_result step
- done: final response complete
- error: on error
"""
try:
messages = self.build_messages(conversation)
messages.append({
"role": "user",
"content": user_message
})
tools = registry.list_all() if tools_enabled else None
iteration = 0
while iteration < MAX_ITERATIONS:
iteration += 1
tool_calls_this_round = None
async for event in llm_client.stream_call(
model=conversation.model,
messages=messages,
tools=tools,
temperature=conversation.temperature,
max_tokens=conversation.max_tokens
):
event_type = event.get("type")
if event_type == "content_delta":
content = event.get("content", "")
if content:
yield {"type": "text", "content": content}
elif event_type == "tool_call_delta":
tool_call = event.get("tool_call", {})
yield {"type": "tool_call", "data": tool_call}
elif event_type == "done":
tool_calls_this_round = event.get("tool_calls")
if tool_calls_this_round and tools_enabled:
yield {"type": "tool_call", "data": tool_calls_this_round}
tool_results = self.tool_executor.process_tool_calls_parallel(
tool_calls_this_round,
{}
)
messages.append({
"role": "assistant",
"content": "",
"tool_calls": tool_calls_this_round
})
for tr in tool_results:
messages.append({
"role": "tool",
"tool_call_id": tr.get("tool_call_id"),
"content": str(tr.get("result", ""))
})
yield {"type": "tool_result", "data": tool_results}
else:
break
if not tool_calls_this_round or not tools_enabled:
break
yield {"type": "done"}
except Exception as e:
yield {"type": "error", "error": str(e)}
def non_stream_response(
self,
conversation: Conversation,
user_message: str,
tools_enabled: bool = False
) -> Dict[str, Any]:
"""Non-streaming response"""
try:
messages = self.build_messages(conversation)
messages.append({
"role": "user",
"content": user_message
})
tools = registry.list_all() if tools_enabled else None
iteration = 0
while iteration < MAX_ITERATIONS:
iteration += 1
response = llm_client.sync_call(
model=conversation.model,
messages=messages,
tools=tools,
temperature=conversation.temperature,
max_tokens=conversation.max_tokens
)
tool_calls = response.tool_calls
if tool_calls and tools_enabled:
messages.append({
"role": "assistant",
"content": response.content,
"tool_calls": tool_calls
})
tool_results = self.tool_executor.process_tool_calls_parallel(tool_calls)
for tr in tool_results:
messages.append({
"role": "tool",
"tool_call_id": tr.get("tool_call_id"),
"content": str(tr.get("result", ""))
})
else:
return {
"success": True,
"content": response.content
}
return {
"success": True,
"content": "Max iterations reached"
}
except Exception as e:
return {
"success": False,
"error": str(e)
}
# Global chat service
chat_service = ChatService()