feat: 增加agent 的后端

2026-04-15 21:36:50 +08:00 · 2026-04-15 21:36:50 +08:00 · 8089d94e78
parent 22a4b8a4bb
commit 8089d94e78
9 changed files with 2354 additions and 0 deletions
--- a/luxx/agents/init.py
+++ b/luxx/agents/init.py
@ -0,0 +1,16 @@
+"""Multi-Agent system module"""
+from luxx.agents.core import Agent, AgentConfig, AgentType, AgentStatus
+from luxx.agents.dag import DAG, TaskNode, TaskNodeStatus, TaskResult
+from luxx.agents.registry import AgentRegistry
+
+__all__ = [
+    "Agent",
+    "AgentConfig", 
+    "AgentType",
+    "AgentStatus",
+    "DAG",
+    "TaskNode",
+    "TaskNodeStatus",
+    "TaskResult",
+    "AgentRegistry",
+]
--- a/luxx/agents/core.py
+++ b/luxx/agents/core.py
@ -0,0 +1,161 @@
+"""Agent core models"""
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+from enum import Enum
+from datetime import datetime
+
+from luxx.tools.core import CommandPermission
+
+
+class AgentType(str, Enum):
+    """Agent type enumeration"""
+    SUPERVISOR = "supervisor"
+    WORKER = "worker"
+
+
+class AgentStatus(str, Enum):
+    """Agent status enumeration"""
+    IDLE = "idle"
+    PLANNING = "planning"
+    EXECUTING = "executing"
+    WAITING = "waiting"
+    COMPLETED = "completed"
+    FAILED = "failed"
+    CANCELLED = "cancelled"
+
+
+@dataclass
+class AgentConfig:
+    """Agent configuration"""
+    name: str
+    agent_type: AgentType
+    description: str = ""
+    max_permission: CommandPermission = CommandPermission.EXECUTE
+    max_turns: int = 10  # Context window: sliding window size
+    model: str = "deepseek-chat"
+    temperature: float = 0.7
+    max_tokens: int = 4096
+    system_prompt: str = ""
+    tools: List[str] = field(default_factory=list)  # Tool names available to this agent
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class Agent:
+    """
+    Agent entity
+    
+    Represents an AI agent with its configuration, state, and context.
+    """
+    id: str
+    config: AgentConfig
+    status: AgentStatus = AgentStatus.IDLE
+    user_id: Optional[int] = None
+    conversation_id: Optional[str] = None
+    workspace: Optional[str] = None
+    
+    # Runtime state
+    created_at: datetime = field(default_factory=datetime.utcnow)
+    updated_at: datetime = field(default_factory=datetime.utcnow)
+    
+    # Context management
+    context_window: List[Dict[str, Any]] = field(default_factory=list)
+    accumulated_result: Dict[str, Any] = field(default_factory=dict)
+    
+    # Progress tracking
+    current_task_id: Optional[str] = None
+    progress: float = 0.0  # 0.0 - 1.0
+    
+    # Permission (effective permission = min(user_permission, agent.max_permission))
+    effective_permission: CommandPermission = field(default_factory=lambda: CommandPermission.EXECUTE)
+    
+    def __post_init__(self):
+        """Post-initialization processing"""
+        if self.config.system_prompt and not self.context_window:
+            # Initialize with system prompt
+            self.context_window = [
+                {"role": "system", "content": self.config.system_prompt}
+            ]
+    
+    def add_message(self, role: str, content: str) -> None:
+        """
+        Add message to context window with sliding window management
+        
+        Args:
+            role: Message role (user/assistant/system)
+            content: Message content
+        """
+        self.context_window.append({"role": role, "content": content})
+        self._trim_context()
+        self.updated_at = datetime.utcnow()
+    
+    def _trim_context(self) -> None:
+        """
+        Trim context window using sliding window strategy
+        
+        Keeps system prompt and the most recent N turns (user+assistant pairs)
+        """
+        max_items = 1 + (self.config.max_turns * 2)  # system + (max_turns * 2)
+        
+        # Always keep system prompt at index 0
+        if len(self.context_window) > max_items and len(self.context_window) > 1:
+            # Keep system prompt and the most recent messages
+            system_prompt = self.context_window[0]
+            remaining = self.context_window[1:]
+            trimmed = remaining[-(max_items - 1):]
+            self.context_window = [system_prompt] + trimmed
+    
+    def get_context(self) -> List[Dict[str, Any]]:
+        """Get current context window"""
+        return self.context_window.copy()
+    
+    def set_user_permission(self, user_permission: CommandPermission) -> None:
+        """
+        Set effective permission based on user and agent limits
+        
+        Effective permission = min(user_permission, agent.max_permission)
+        
+        Args:
+            user_permission: User's permission level
+        """
+        self.effective_permission = min(user_permission, self.config.max_permission)
+    
+    def store_result(self, key: str, value: Any) -> None:
+        """
+        Store result for supervisor's result-based context management
+        
+        Args:
+            key: Result key
+            value: Result value
+        """
+        self.accumulated_result[key] = value
+        self.updated_at = datetime.utcnow()
+    
+    def get_result(self, key: str) -> Optional[Any]:
+        """Get stored result by key"""
+        return self.accumulated_result.get(key)
+    
+    def clear_context(self) -> None:
+        """Clear context but keep system prompt"""
+        if self.context_window and self.context_window[0]["role"] == "system":
+            system_prompt = self.context_window[0]
+            self.context_window = [system_prompt]
+        else:
+            self.context_window = []
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for serialization"""
+        return {
+            "id": self.id,
+            "name": self.config.name,
+            "type": self.config.agent_type.value,
+            "status": self.status.value,
+            "user_id": self.user_id,
+            "conversation_id": self.conversation_id,
+            "workspace": self.workspace,
+            "created_at": self.created_at.isoformat(),
+            "updated_at": self.updated_at.isoformat(),
+            "current_task_id": self.current_task_id,
+            "progress": self.progress,
+            "effective_permission": self.effective_permission.name,
+        }
--- a/luxx/agents/dag.py
+++ b/luxx/agents/dag.py
@ -0,0 +1,418 @@
+"""DAG (Directed Acyclic Graph) and TaskNode models"""
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Set
+from enum import Enum
+from datetime import datetime
+import uuid
+
+
+class TaskNodeStatus(str, Enum):
+    """Task node status enumeration"""
+    PENDING = "pending"      # Not yet started
+    READY = "ready"          # Dependencies satisfied, can start
+    RUNNING = "running"      # Currently executing
+    COMPLETED = "completed"  # Successfully completed
+    FAILED = "failed"        # Execution failed
+    CANCELLED = "cancelled"  # Cancelled by user
+    BLOCKED = "blocked"       # Blocked by failed dependency
+
+
+@dataclass
+class TaskResult:
+    """Task execution result"""
+    success: bool
+    data: Any = None
+    error: Optional[str] = None
+    output_data: Optional[Dict[str, Any]] = None  # Structured output for supervisor
+    execution_time: float = 0.0  # seconds
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary"""
+        return {
+            "success": self.success,
+            "data": self.data,
+            "error": self.error,
+            "output_data": self.output_data,
+            "execution_time": self.execution_time,
+        }
+    
+    @classmethod
+    def ok(cls, data: Any = None, output_data: Optional[Dict[str, Any]] = None, 
+           execution_time: float = 0.0) -> "TaskResult":
+        """Create success result"""
+        return cls(success=True, data=data, output_data=output_data, execution_time=execution_time)
+    
+    @classmethod
+    def fail(cls, error: str, data: Any = None) -> "TaskResult":
+        """Create failure result"""
+        return cls(success=False, error=error, data=data)
+
+
+@dataclass
+class TaskNode:
+    """
+    Task node in the DAG
+    
+    Represents a single executable task within the agent workflow.
+    """
+    id: str
+    name: str
+    description: str = ""
+    
+    # Task definition
+    task_type: str = "generic"  # e.g., "code", "shell", "file", "llm"
+    task_data: Dict[str, Any] = field(default_factory=dict)  # Task-specific parameters
+    
+    # Dependencies (node IDs that must complete before this node)
+    dependencies: List[str] = field(default_factory=list)
+    
+    # Status tracking
+    status: TaskNodeStatus = TaskNodeStatus.PENDING
+    result: Optional[TaskResult] = None
+    
+    # Progress tracking
+    progress: float = 0.0  # 0.0 - 1.0
+    progress_message: str = ""
+    
+    # Execution info
+    assigned_agent_id: Optional[str] = None
+    started_at: Optional[datetime] = None
+    completed_at: Optional[datetime] = None
+    
+    # Output data (key-value pairs for dependent tasks)
+    output_data: Dict[str, Any] = field(default_factory=dict)
+    
+    def __post_init__(self):
+        """Post-initialization"""
+        if not self.id:
+            self.id = str(uuid.uuid4())[:8]
+    
+    @property
+    def is_root(self) -> bool:
+        """Check if this is a root node (no dependencies)"""
+        return len(self.dependencies) == 0
+    
+    @property
+    def is_leaf(self) -> bool:
+        """Check if this is a leaf node (no dependents)"""
+        return False  # Will be set by DAG
+    
+    @property
+    def execution_time(self) -> float:
+        """Calculate execution time in seconds"""
+        if self.started_at and self.completed_at:
+            return (self.completed_at - self.started_at).total_seconds()
+        return 0.0
+    
+    def mark_ready(self) -> None:
+        """Mark node as ready to execute"""
+        self.status = TaskNodeStatus.READY
+    
+    def mark_running(self, agent_id: str) -> None:
+        """Mark node as running"""
+        self.status = TaskNodeStatus.RUNNING
+        self.assigned_agent_id = agent_id
+        self.started_at = datetime.utcnow()
+    
+    def mark_completed(self, result: TaskResult) -> None:
+        """Mark node as completed"""
+        self.status = TaskNodeStatus.COMPLETED
+        self.result = result
+        self.completed_at = datetime.utcnow()
+        self.progress = 1.0
+        if result.output_data:
+            self.output_data = result.output_data
+    
+    def mark_failed(self, error: str) -> None:
+        """Mark node as failed"""
+        self.status = TaskNodeStatus.FAILED
+        self.result = TaskResult.fail(error)
+        self.completed_at = datetime.utcnow()
+    
+    def mark_cancelled(self) -> None:
+        """Mark node as cancelled"""
+        self.status = TaskNodeStatus.CANCELLED
+        self.completed_at = datetime.utcnow()
+    
+    def update_progress(self, progress: float, message: str = "") -> None:
+        """Update execution progress"""
+        self.progress = max(0.0, min(1.0, progress))
+        if message:
+            self.progress_message = message
+    
+    def can_execute(self, completed_nodes: Set[str]) -> bool:
+        """
+        Check if this node can execute (all dependencies completed)
+        
+        Args:
+            completed_nodes: Set of completed node IDs
+        """
+        if self.status != TaskNodeStatus.READY:
+            return False
+        return all(dep_id in completed_nodes for dep_id in self.dependencies)
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary"""
+        return {
+            "id": self.id,
+            "name": self.name,
+            "description": self.description,
+            "task_type": self.task_type,
+            "task_data": self.task_data,
+            "dependencies": self.dependencies,
+            "status": self.status.value,
+            "progress": self.progress,
+            "progress_message": self.progress_message,
+            "assigned_agent_id": self.assigned_agent_id,
+            "result": self.result.to_dict() if self.result else None,
+            "output_data": self.output_data,
+            "started_at": self.started_at.isoformat() if self.started_at else None,
+            "completed_at": self.completed_at.isoformat() if self.completed_at else None,
+        }
+
+
+@dataclass
+class DAG:
+    """
+    Directed Acyclic Graph for task scheduling
+    
+    Manages the task workflow with dependency tracking and parallel execution support.
+    """
+    id: str
+    name: str = ""
+    description: str = ""
+    
+    # Nodes and edges
+    nodes: Dict[str, TaskNode] = field(default_factory=dict)
+    edges: List[tuple] = field(default_factory=list)  # (from_id, to_id)
+    
+    # Metadata
+    created_at: datetime = field(default_factory=datetime.utcnow)
+    updated_at: datetime = field(default_factory=datetime.utcnow)
+    
+    # Root and leaf tracking
+    _root_nodes: Set[str] = field(default_factory=set)
+    _leaf_nodes: Set[str] = field(default_factory=set)
+    
+    def __post_init__(self):
+        """Post-initialization"""
+        if not self.id:
+            self.id = str(uuid.uuid4())[:8]
+    
+    def add_node(self, node: TaskNode) -> None:
+        """
+        Add a task node to the DAG
+        
+        Args:
+            node: TaskNode to add
+        """
+        self.nodes[node.id] = node
+        self._update_root_leaf_cache()
+        self.updated_at = datetime.utcnow()
+    
+    def add_edge(self, from_id: str, to_id: str) -> None:
+        """
+        Add an edge (dependency) between nodes
+        
+        Args:
+            from_id: Source node ID (must complete first)
+            to_id: Target node ID (depends on source)
+        """
+        if from_id not in self.nodes:
+            raise ValueError(f"Source node '{from_id}' not found")
+        if to_id not in self.nodes:
+            raise ValueError(f"Target node '{to_id}' not found")
+        
+        # Add dependency to target node
+        if from_id not in self.nodes[to_id].dependencies:
+            self.nodes[to_id].dependencies.append(from_id)
+        
+        self.edges.append((from_id, to_id))
+        self._update_root_leaf_cache()
+        self.updated_at = datetime.utcnow()
+    
+    def _update_root_leaf_cache(self) -> None:
+        """Update cached root and leaf node sets"""
+        self._root_nodes = set()
+        self._leaf_nodes = set()
+        
+        for node_id in self.nodes:
+            if len(self.nodes[node_id].dependencies) == 0:
+                self._root_nodes.add(node_id)
+            
+            # Check if node is a leaf (no one depends on it)
+            is_leaf = True
+            for other in self.nodes.values():
+                if node_id in other.dependencies:
+                    is_leaf = False
+                    break
+            if is_leaf:
+                self._leaf_nodes.add(node_id)
+    
+    @property
+    def root_nodes(self) -> List[TaskNode]:
+        """Get all root nodes (nodes with no dependencies)"""
+        return [self.nodes[nid] for nid in self._root_nodes if nid in self.nodes]
+    
+    @property
+    def leaf_nodes(self) -> List[TaskNode]:
+        """Get all leaf nodes (nodes no one depends on)"""
+        return [self.nodes[nid] for nid in self._leaf_nodes if nid in self.nodes]
+    
+    def get_ready_nodes(self, completed_nodes: Set[str]) -> List[TaskNode]:
+        """
+        Get all nodes that are ready to execute
+        
+        A node is ready if:
+        1. All its dependencies are in completed_nodes
+        2. It is not already running or completed
+        
+        Args:
+            completed_nodes: Set of completed node IDs
+        """
+        ready = []
+        for node in self.nodes.values():
+            if node.status == TaskNodeStatus.READY and node.can_execute(completed_nodes):
+                ready.append(node)
+        return ready
+    
+    def get_blocked_nodes(self, failed_node_id: str) -> List[TaskNode]:
+        """
+        Get all nodes blocked by a failed node
+        
+        Args:
+            failed_node_id: ID of the failed node
+        """
+        blocked = []
+        for node in self.nodes.values():
+            if failed_node_id in node.dependencies:
+                if node.status not in (TaskNodeStatus.COMPLETED, TaskNodeStatus.FAILED, TaskNodeStatus.CANCELLED):
+                    blocked.append(node)
+        return blocked
+    
+    def mark_node_completed(self, node_id: str, result: TaskResult) -> None:
+        """Mark a node as completed with result"""
+        if node_id in self.nodes:
+            self.nodes[node_id].mark_completed(result)
+            self.updated_at = datetime.utcnow()
+    
+    def mark_node_failed(self, node_id: str, error: str) -> None:
+        """Mark a node as failed"""
+        if node_id in self.nodes:
+            self.nodes[node_id].mark_failed(error)
+            self._propagate_failure(node_id)
+            self.updated_at = datetime.utcnow()
+    
+    def _propagate_failure(self, failed_node_id: str) -> None:
+        """Propagate failure to dependent nodes"""
+        for node in self.nodes.values():
+            if failed_node_id in node.dependencies:
+                if node.status == TaskNodeStatus.READY:
+                    node.status = TaskNodeStatus.BLOCKED
+    
+    @property
+    def completed_count(self) -> int:
+        """Count of completed nodes"""
+        return sum(1 for n in self.nodes.values() if n.status == TaskNodeStatus.COMPLETED)
+    
+    @property
+    def failed_count(self) -> int:
+        """Count of failed nodes"""
+        return sum(1 for n in self.nodes.values() if n.status == TaskNodeStatus.FAILED)
+    
+    @property
+    def total_count(self) -> int:
+        """Total number of nodes"""
+        return len(self.nodes)
+    
+    @property
+    def progress(self) -> float:
+        """Overall DAG progress (0.0 - 1.0)"""
+        if not self.nodes:
+            return 0.0
+        return sum(n.progress for n in self.nodes.values()) / len(self.nodes)
+    
+    @property
+    def is_complete(self) -> bool:
+        """Check if DAG execution is complete (all nodes done)"""
+        return all(
+            n.status in (TaskNodeStatus.COMPLETED, TaskNodeStatus.FAILED, TaskNodeStatus.CANCELLED)
+            for n in self.nodes.values()
+        )
+    
+    @property
+    def is_success(self) -> bool:
+        """Check if DAG completed successfully"""
+        return self.is_complete and self.failed_count == 0
+    
+    def get_execution_order(self) -> List[List[TaskNode]]:
+        """
+        Get nodes grouped by execution level (parallel-friendly order)
+        
+        Returns:
+            List of node groups, where each group can be executed in parallel
+        """
+        levels: List[List[TaskNode]] = []
+        completed: Set[str] = set()
+        
+        while len(completed) < len(self.nodes):
+            # Find nodes whose dependencies are all completed
+            current_level = []
+            for node in self.nodes.values():
+                if node.id not in completed and node.can_execute(completed):
+                    current_level.append(node)
+            
+            if not current_level:
+                break  # Circular dependency or all blocked
+            
+            levels.append(current_level)
+            completed.update(n.id for n in current_level)
+        
+        return levels
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for serialization"""
+        return {
+            "id": self.id,
+            "name": self.name,
+            "description": self.description,
+            "nodes": [n.to_dict() for n in self.nodes.values()],
+            "edges": [{"from": e[0], "to": e[1]} for e in self.edges],
+            "created_at": self.created_at.isoformat(),
+            "updated_at": self.updated_at.isoformat(),
+            "progress": self.progress,
+            "completed_count": self.completed_count,
+            "total_count": self.total_count,
+        }
+    
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "DAG":
+        """
+        Create DAG from dictionary
+        
+        Args:
+            data: Dictionary with DAG data
+        """
+        dag = cls(
+            id=data.get("id", str(uuid.uuid4())[:8]),
+            name=data.get("name", ""),
+            description=data.get("description", ""),
+        )
+        
+        # Recreate nodes
+        for node_data in data.get("nodes", []):
+            node = TaskNode(
+                id=node_data["id"],
+                name=node_data["name"],
+                description=node_data.get("description", ""),
+                task_type=node_data.get("task_type", "generic"),
+                task_data=node_data.get("task_data", {}),
+                dependencies=node_data.get("dependencies", []),
+            )
+            dag.add_node(node)
+        
+        # Recreate edges
+        for edge in data.get("edges", []):
+            dag.add_edge(edge["from"], edge["to"])
+        
+        return dag
--- a/luxx/agents/dag_scheduler.py
+++ b/luxx/agents/dag_scheduler.py
@ -0,0 +1,360 @@
+"""DAG Scheduler - orchestrates parallel task execution"""
+import asyncio
+import logging
+from typing import Any, Callable, Dict, List, Optional, Set
+from concurrent.futures import ThreadPoolExecutor
+import threading
+
+from luxx.agents.core import Agent, AgentConfig, AgentType, AgentStatus
+from luxx.agents.dag import DAG, TaskNode, TaskNodeStatus, TaskResult
+from luxx.agents.supervisor import SupervisorAgent
+from luxx.agents.worker import WorkerAgent
+from luxx.tools.executor import ToolExecutor
+
+logger = logging.getLogger(__name__)
+
+
+class DAGScheduler:
+    """
+    DAG Scheduler
+    
+    Orchestrates parallel execution of tasks based on DAG structure.
+    Features:
+    - Parallel execution with max_workers limit
+    - Dependency-aware scheduling
+    - Real-time progress tracking
+    - Cancellation support
+    """
+    
+    def __init__(
+        self,
+        dag: DAG,
+        supervisor: SupervisorAgent,
+        worker_factory: Callable[[], WorkerAgent],
+        max_workers: int = 3,
+        tool_executor: ToolExecutor = None
+    ):
+        """
+        Initialize DAG Scheduler
+        
+        Args:
+            dag: DAG to execute
+            supervisor: Supervisor agent instance
+            worker_factory: Factory function to create worker agents
+            max_workers: Maximum parallel workers
+            tool_executor: Tool executor instance
+        """
+        self.dag = dag
+        self.supervisor = supervisor
+        self.worker_factory = worker_factory
+        self.max_workers = max_workers
+        self.tool_executor = tool_executor or ToolExecutor()
+        
+        # Execution state
+        self._running_nodes: Set[str] = set()
+        self._completed_nodes: Set[str] = set()
+        self._node_results: Dict[str, TaskResult] = {}
+        self._parent_outputs: Dict[str, Dict[str, Any]] = {}
+        
+        # Control flags
+        self._cancelled = False
+        self._cancel_lock = threading.Lock()
+        
+        # Progress callback
+        self._progress_callback: Optional[Callable] = None
+    
+    def set_progress_callback(self, callback: Optional[Callable]) -> None:
+        """Set progress callback for real-time updates"""
+        self._progress_callback = callback
+    
+    def _emit_progress(self, node_id: str, progress: float, message: str = "") -> None:
+        """Emit progress update"""
+        if self._progress_callback:
+            self._progress_callback(node_id, progress, message)
+    
+    async def execute(
+        self,
+        context: Dict[str, Any],
+        task: str
+    ) -> Dict[str, Any]:
+        """
+        Execute the DAG
+        
+        Args:
+            context: Execution context (workspace, user info, etc.)
+            task: Original task description
+            
+        Returns:
+            Execution results
+        """
+        self._cancelled = False
+        self._running_nodes.clear()
+        self._completed_nodes.clear()
+        self._node_results.clear()
+        self._parent_outputs.clear()
+        
+        # Emit DAG start
+        self._emit_progress("dag", 0.0, "Starting DAG execution")
+        
+        # Initialize nodes - mark root nodes as ready
+        for node in self.dag.root_nodes:
+            node.mark_ready()
+        
+        # Main execution loop
+        while not self._is_execution_complete():
+            if self._is_cancelled():
+                await self._cancel_running_nodes()
+                return self._build_cancelled_result()
+            
+            # Get ready nodes that can be executed
+            ready_nodes = self._get_ready_nodes()
+            
+            if not ready_nodes and not self._running_nodes:
+                # No ready nodes and nothing running - deadlock or all blocked
+                logger.warning("No ready nodes and no running nodes - possible deadlock")
+                break
+            
+            # Launch ready nodes up to max_workers
+            nodes_to_launch = [
+                n for n in ready_nodes 
+                if len(self._running_nodes) < self.max_workers
+                and n.id not in self._running_nodes
+            ]
+            
+            for node in nodes_to_launch:
+                asyncio.create_task(self._execute_node(node, context))
+            
+            # Small delay to prevent busy waiting
+            await asyncio.sleep(0.1)
+        
+        # Emit DAG completion
+        success = self.dag.is_success
+        self._emit_progress("dag", 1.0, "DAG execution complete" if success else "DAG execution failed")
+        
+        return self._build_result()
+    
+    def _is_execution_complete(self) -> bool:
+        """Check if execution is complete"""
+        return all(
+            n.status in (TaskNodeStatus.COMPLETED, TaskNodeStatus.FAILED, TaskNodeStatus.CANCELLED, TaskNodeStatus.BLOCKED)
+            for n in self.dag.nodes.values()
+        )
+    
+    def _is_cancelled(self) -> bool:
+        """Check if execution was cancelled"""
+        with self._cancel_lock:
+            return self._cancelled
+    
+    def cancel(self) -> None:
+        """Cancel execution"""
+        with self._cancel_lock:
+            self._cancelled = True
+        self._emit_progress("dag", 0.0, "Execution cancelled")
+    
+    def _get_ready_nodes(self) -> List[TaskNode]:
+        """Get nodes that are ready to execute"""
+        return [
+            n for n in self.dag.nodes.values()
+            if n.status == TaskNodeStatus.READY
+            and n.id not in self._running_nodes
+            and n.can_execute(self._completed_nodes)
+        ]
+    
+    async def _execute_node(self, node: TaskNode, context: Dict[str, Any]) -> None:
+        """
+        Execute a single node
+        
+        Args:
+            node: Node to execute
+            context: Execution context
+        """
+        if self._is_cancelled():
+            node.mark_cancelled()
+            return
+        
+        # Mark node as running
+        self._running_nodes.add(node.id)
+        node.mark_running(self.supervisor.agent.id)
+        
+        self._emit_progress(node.id, 0.0, f"Starting: {node.name}")
+        
+        # Collect parent outputs for this node
+        parent_outputs = {}
+        for dep_id in node.dependencies:
+            if dep_id in self._node_results:
+                parent_outputs[dep_id] = self._node_results[dep_id].output_data or {}
+        
+        # Create worker for this task
+        worker = self.worker_factory()
+        
+        # Define progress callback for this node
+        def node_progress(progress: float, message: str = ""):
+            node.update_progress(progress, message)
+            self._emit_progress(node.id, progress, message)
+        
+        try:
+            # Execute task
+            result = await worker.execute_task(
+                node,
+                context,
+                parent_outputs=parent_outputs,
+                progress_callback=node_progress
+            )
+            
+            # Store result
+            self._node_results[node.id] = result
+            
+            if result.success:
+                node.mark_completed(result)
+                self._completed_nodes.add(node.id)
+                self._emit_progress(node.id, 1.0, f"Completed: {node.name}")
+                
+                # Check if any blocked nodes can now run
+                self._unblock_nodes()
+            else:
+                node.mark_failed(result.error or "Unknown error")
+                self._emit_progress(node.id, 0.0, f"Failed: {node.name} - {result.error}")
+                
+                # Block dependent nodes
+                self._block_dependent_nodes(node.id)
+            
+        except Exception as e:
+            logger.error(f"Node {node.id} execution error: {e}")
+            node.mark_failed(str(e))
+            self._node_results[node.id] = TaskResult.fail(error=str(e))
+            self._block_dependent_nodes(node.id)
+        
+        finally:
+            self._running_nodes.discard(node.id)
+    
+    def _unblock_nodes(self) -> None:
+        """Unblock nodes whose dependencies are now satisfied"""
+        for node in self.dag.nodes.values():
+            if node.status == TaskNodeStatus.BLOCKED:
+                if node.can_execute(self._completed_nodes):
+                    node.mark_ready()
+                    self._emit_progress(node.id, 0.0, f"Unblocked: {node.name}")
+    
+    def _block_dependent_nodes(self, failed_node_id: str) -> None:
+        """Block nodes that depend on a failed node"""
+        blocked = self.dag.get_blocked_nodes(failed_node_id)
+        for node in blocked:
+            node.status = TaskNodeStatus.BLOCKED
+            self._emit_progress(node.id, 0.0, f"Blocked due to: {failed_node_id}")
+    
+    async def _cancel_running_nodes(self) -> None:
+        """Cancel all running nodes"""
+        for node_id in self._running_nodes:
+            if node_id in self.dag.nodes:
+                self.dag.nodes[node_id].mark_cancelled()
+        self._running_nodes.clear()
+    
+    def _build_result(self) -> Dict[str, Any]:
+        """Build final result"""
+        return {
+            "success": self.dag.is_success,
+            "dag_id": self.dag.id,
+            "total_tasks": self.dag.total_count,
+            "completed_tasks": self.dag.completed_count,
+            "failed_tasks": self.dag.failed_count,
+            "progress": self.dag.progress,
+            "results": {
+                node_id: result.to_dict()
+                for node_id, result in self._node_results.items()
+            }
+        }
+    
+    def _build_cancelled_result(self) -> Dict[str, Any]:
+        """Build cancelled result"""
+        return {
+            "success": False,
+            "cancelled": True,
+            "dag_id": self.dag.id,
+            "total_tasks": self.dag.total_count,
+            "completed_tasks": self.dag.completed_count,
+            "failed_tasks": self.dag.failed_count,
+            "progress": self.dag.progress,
+            "results": {
+                node_id: result.to_dict()
+                for node_id, result in self._node_results.items()
+            }
+        }
+
+
+class SchedulerPool:
+    """
+    Pool of DAG schedulers for managing multiple concurrent DAG executions
+    """
+    
+    def __init__(self, max_concurrent: int = 10):
+        """
+        Initialize scheduler pool
+        
+        Args:
+            max_concurrent: Maximum concurrent DAG executions
+        """
+        self.max_concurrent = max_concurrent
+        self._schedulers: Dict[str, DAGScheduler] = {}
+        self._lock = threading.Lock()
+    
+    def create_scheduler(
+        self,
+        task_id: str,
+        dag: DAG,
+        supervisor: SupervisorAgent,
+        worker_factory: Callable,
+        max_workers: int = 3
+    ) -> DAGScheduler:
+        """
+        Create and register a new scheduler
+        
+        Args:
+            task_id: Unique task identifier
+            dag: DAG to execute
+            supervisor: Supervisor agent
+            worker_factory: Worker factory function
+            max_workers: Max parallel workers
+            
+        Returns:
+            Created scheduler
+        """
+        with self._lock:
+            if len(self._schedulers) >= self.max_concurrent:
+                raise RuntimeError("Maximum concurrent schedulers reached")
+            
+            scheduler = DAGScheduler(
+                dag=dag,
+                supervisor=supervisor,
+                worker_factory=worker_factory,
+                max_workers=max_workers
+            )
+            
+            self._schedulers[task_id] = scheduler
+            return scheduler
+    
+    def get(self, task_id: str) -> Optional[DAGScheduler]:
+        """Get scheduler by task ID"""
+        with self._lock:
+            return self._schedulers.get(task_id)
+    
+    def remove(self, task_id: str) -> bool:
+        """Remove scheduler"""
+        with self._lock:
+            if task_id in self._schedulers:
+                del self._schedulers[task_id]
+                return True
+            return False
+    
+    def cancel(self, task_id: str) -> bool:
+        """Cancel a scheduler"""
+        scheduler = self.get(task_id)
+        if scheduler:
+            scheduler.cancel()
+            return True
+        return False
+    
+    @property
+    def active_count(self) -> int:
+        """Number of active schedulers"""
+        with self._lock:
+            return len(self._schedulers)
--- a/luxx/agents/registry.py
+++ b/luxx/agents/registry.py
@ -0,0 +1,266 @@
+"""Agent Registry - manages agent lifecycle and access"""
+from typing import Dict, List, Optional, Set
+import threading
+import uuid
+
+from luxx.agents.core import Agent, AgentConfig, AgentType, AgentStatus
+
+
+class AgentRegistry:
+    """
+    Agent Registry (Singleton)
+    
+    Thread-safe registry for managing all agents in the system.
+    Provides agent creation, retrieval, and lifecycle management.
+    """
+    _instance: Optional["AgentRegistry"] = None
+    _lock: threading.Lock = threading.Lock()
+    
+    def __new__(cls):
+        if cls._instance is None:
+            with cls._lock:
+                if cls._instance is None:
+                    cls._instance = super().__new__(cls)
+                    cls._instance._initialized = False
+        return cls._instance
+    
+    def __init__(self):
+        if self._initialized:
+            return
+        self._agents: Dict[str, Agent] = {}
+        self._user_agents: Dict[int, Set[str]] = {}  # user_id -> set of agent_ids
+        self._conversation_agents: Dict[str, Set[str]] = {}  # conversation_id -> set of agent_ids
+        self._registry_lock = threading.Lock()
+        self._initialized = True
+    
+    def _generate_agent_id(self) -> str:
+        """Generate unique agent ID"""
+        return f"agent_{uuid.uuid4().hex[:12]}"
+    
+    def create_agent(
+        self,
+        config: AgentConfig,
+        user_id: Optional[int] = None,
+        conversation_id: Optional[str] = None,
+        workspace: Optional[str] = None,
+    ) -> Agent:
+        """
+        Create and register a new agent
+        
+        Args:
+            config: Agent configuration
+            user_id: Associated user ID
+            conversation_id: Associated conversation ID
+            workspace: Agent's workspace path
+            
+        Returns:
+            Created Agent instance
+        """
+        with self._registry_lock:
+            agent_id = self._generate_agent_id()
+            agent = Agent(
+                id=agent_id,
+                config=config,
+                user_id=user_id,
+                conversation_id=conversation_id,
+                workspace=workspace,
+            )
+            
+            self._agents[agent_id] = agent
+            
+            # Track by user
+            if user_id:
+                if user_id not in self._user_agents:
+                    self._user_agents[user_id] = set()
+                self._user_agents[user_id].add(agent_id)
+            
+            # Track by conversation
+            if conversation_id:
+                if conversation_id not in self._conversation_agents:
+                    self._conversation_agents[conversation_id] = set()
+                self._conversation_agents[conversation_id].add(agent_id)
+            
+            return agent
+    
+    def get(self, agent_id: str) -> Optional[Agent]:
+        """
+        Get agent by ID
+        
+        Args:
+            agent_id: Agent ID
+            
+        Returns:
+            Agent if found, None otherwise
+        """
+        with self._registry_lock:
+            return self._agents.get(agent_id)
+    
+    def list_user_agents(self, user_id: int) -> List[Agent]:
+        """
+        List all agents for a user
+        
+        Args:
+            user_id: User ID
+            
+        Returns:
+            List of user's agents
+        """
+        with self._registry_lock:
+            agent_ids = self._user_agents.get(user_id, set())
+            return [self._agents[aid] for aid in agent_ids if aid in self._agents]
+    
+    def list_conversation_agents(self, conversation_id: str) -> List[Agent]:
+        """
+        List all agents in a conversation
+        
+        Args:
+            conversation_id: Conversation ID
+            
+        Returns:
+            List of conversation's agents
+        """
+        with self._registry_lock:
+            agent_ids = self._conversation_agents.get(conversation_id, set())
+            return [self._agents[aid] for aid in agent_ids if aid in self._agents]
+    
+    def list_by_type(self, agent_type: AgentType) -> List[Agent]:
+        """
+        List all agents of a specific type
+        
+        Args:
+            agent_type: Type of agent to filter
+            
+        Returns:
+            List of agents of the specified type
+        """
+        with self._registry_lock:
+            return [
+                a for a in self._agents.values()
+                if a.config.agent_type == agent_type
+            ]
+    
+    def list_by_status(self, status: AgentStatus) -> List[Agent]:
+        """
+        List all agents with a specific status
+        
+        Args:
+            status: Status to filter
+            
+        Returns:
+            List of agents with the specified status
+        """
+        with self._registry_lock:
+            return [a for a in self._agents.values() if a.status == status]
+    
+    def update_status(self, agent_id: str, status: AgentStatus) -> bool:
+        """
+        Update agent status
+        
+        Args:
+            agent_id: Agent ID
+            status: New status
+            
+        Returns:
+            True if updated, False if agent not found
+        """
+        with self._registry_lock:
+            agent = self._agents.get(agent_id)
+            if agent:
+                agent.status = status
+                return True
+            return False
+    
+    def remove(self, agent_id: str) -> bool:
+        """
+        Remove agent from registry
+        
+        Args:
+            agent_id: Agent ID
+            
+        Returns:
+            True if removed, False if not found
+        """
+        with self._registry_lock:
+            agent = self._agents.pop(agent_id, None)
+            if agent is None:
+                return False
+            
+            # Remove from user tracking
+            if agent.user_id and agent.user_id in self._user_agents:
+                self._user_agents[agent.user_id].discard(agent_id)
+            
+            # Remove from conversation tracking
+            if agent.conversation_id and agent.conversation_id in self._conversation_agents:
+                self._conversation_agents[agent.conversation_id].discard(agent_id)
+            
+            return True
+    
+    def remove_user_agents(self, user_id: int) -> int:
+        """
+        Remove all agents for a user
+        
+        Args:
+            user_id: User ID
+            
+        Returns:
+            Number of agents removed
+        """
+        with self._registry_lock:
+            agent_ids = self._user_agents.pop(user_id, set())
+            count = 0
+            for agent_id in agent_ids:
+                if agent_id in self._agents:
+                    del self._agents[agent_id]
+                    count += 1
+            return count
+    
+    def remove_conversation_agents(self, conversation_id: str) -> int:
+        """
+        Remove all agents in a conversation
+        
+        Args:
+            conversation_id: Conversation ID
+            
+        Returns:
+            Number of agents removed
+        """
+        with self._registry_lock:
+            agent_ids = self._conversation_agents.pop(conversation_id, set())
+            count = 0
+            for agent_id in agent_ids:
+                if agent_id in self._agents:
+                    del self._agents[agent_id]
+                    count += 1
+            return count
+    
+    def clear(self) -> None:
+        """Clear all agents from registry"""
+        with self._registry_lock:
+            self._agents.clear()
+            self._user_agents.clear()
+            self._conversation_agents.clear()
+    
+    @property
+    def agent_count(self) -> int:
+        """Total number of agents"""
+        with self._registry_lock:
+            return len(self._agents)
+    
+    def get_stats(self) -> Dict:
+        """Get registry statistics"""
+        with self._registry_lock:
+            status_counts = {}
+            type_counts = {}
+            for agent in self._agents.values():
+                status_counts[agent.status.value] = status_counts.get(agent.status.value, 0) + 1
+                type_counts[agent.config.agent_type.value] = type_counts.get(agent.config.agent_type.value, 0) + 1
+            
+            return {
+                "total": len(self._agents),
+                "by_status": status_counts,
+                "by_type": type_counts,
+            }
+
+
+# Global registry instance
+registry = AgentRegistry()
--- a/luxx/agents/supervisor.py
+++ b/luxx/agents/supervisor.py
@ -0,0 +1,345 @@
+"""Supervisor Agent - task decomposition and result integration"""
+import json
+import logging
+from typing import Any, Dict, List, Optional, Callable
+
+from luxx.agents.core import Agent, AgentConfig, AgentType, AgentStatus
+from luxx.agents.dag import DAG, TaskNode, TaskNodeStatus, TaskResult
+from luxx.services.llm_client import llm_client
+from luxx.tools.core import registry as tool_registry
+
+logger = logging.getLogger(__name__)
+
+
+class SupervisorAgent:
+    """
+    Supervisor Agent
+    
+    Responsible for:
+    - Task decomposition using LLM
+    - Generating DAG (task graph)
+    - Result integration from workers
+    """
+    
+    # System prompt for task decomposition
+    DEFAULT_SYSTEM_PROMPT = """You are a Supervisor Agent that decomposes complex tasks into executable subtasks.
+
+Your responsibilities:
+1. Analyze the user's task and break it down into smaller, manageable subtasks
+2. Create a DAG (Directed Acyclic Graph) where nodes are subtasks and edges represent dependencies
+3. Each subtask should be specific and actionable
+4. Consider parallel execution opportunities - tasks without dependencies can run concurrently
+5. Store key results from subtasks for final integration
+
+Output format for task decomposition:
+{
+  "task_name": "Overall task name",
+  "task_description": "Description of what needs to be accomplished",
+  "nodes": [
+    {
+      "id": "task_001",
+      "name": "Task name",
+      "description": "What this task does",
+      "task_type": "code|shell|file|llm|generic",
+      "task_data": {...},  # Task-specific parameters
+      "dependencies": []  # IDs of tasks that must complete first
+    }
+  ]
+}
+
+Guidelines:
+- Keep tasks focused and atomic
+- Use meaningful task IDs (e.g., task_001, task_002)
+- Mark parallelizable tasks with no dependencies
+- Maximum 10 subtasks for a single decomposition
+- Include only the output_data that matters for dependent tasks or final result
+"""
+    
+    def __init__(
+        self,
+        agent: Agent,
+        llm_client=None,
+        max_subtasks: int = 10
+    ):
+        """
+        Initialize Supervisor Agent
+        
+        Args:
+            agent: Agent instance (should be SUPERVISOR type)
+            llm_client: LLM client instance
+            max_subtasks: Maximum number of subtasks to generate
+        """
+        self.agent = agent
+        self.llm_client = llm_client or llm_client
+        self.max_subtasks = max_subtasks
+        
+        # Ensure agent has supervisor system prompt
+        if not self.agent.config.system_prompt:
+            self.agent.config.system_prompt = self.DEFAULT_SYSTEM_PROMPT
+    
+    async def decompose_task(
+        self,
+        task: str,
+        context: Dict[str, Any],
+        progress_callback: Optional[Callable] = None
+    ) -> DAG:
+        """
+        Decompose a task into subtasks using LLM
+        
+        Args:
+            task: User's task description
+            context: Execution context (workspace, user info, etc.)
+            progress_callback: Optional callback for progress updates
+            
+        Returns:
+            DAG representing the task decomposition
+        """
+        self.agent.status = AgentStatus.PLANNING
+        
+        if progress_callback:
+            progress_callback(0.1, "Analyzing task...")
+        
+        # Build messages for LLM
+        messages = self.agent.get_context()
+        messages.append({
+            "role": "user", 
+            "content": f"Decompose this task into subtasks:\n{task}"
+        })
+        
+        if progress_callback:
+            progress_callback(0.2, "Calling LLM for task decomposition...")
+        
+        # Call LLM
+        try:
+            response = await self.llm_client.sync_call(
+                model=self.agent.config.model,
+                messages=messages,
+                temperature=self.agent.config.temperature,
+                max_tokens=self.agent.config.max_tokens
+            )
+            
+            if progress_callback:
+                progress_callback(0.5, "Processing decomposition...")
+            
+            # Parse LLM response to extract DAG
+            dag = self._parse_dag_from_response(response.content, task)
+            
+            # Add assistant response to context
+            self.agent.add_message("assistant", response.content)
+            
+            if progress_callback:
+                progress_callback(0.9, "Task decomposition complete")
+            
+            self.agent.status = AgentStatus.IDLE
+            return dag
+            
+        except Exception as e:
+            logger.error(f"Task decomposition failed: {e}")
+            self.agent.status = AgentStatus.FAILED
+            raise
+    
+    def _parse_dag_from_response(self, content: str, original_task: str) -> DAG:
+        """
+        Parse LLM response to extract DAG structure
+        
+        Args:
+            content: LLM response content
+            original_task: Original task description
+            
+        Returns:
+            DAG instance
+        """
+        # Try to extract JSON from response
+        dag_data = self._extract_json(content)
+        
+        if not dag_data:
+            # Fallback: create a simple single-node DAG
+            logger.warning("Could not parse DAG from LLM response, creating simple DAG")
+            dag = DAG(
+                id=f"dag_{self.agent.id}",
+                name=original_task[:50],
+                description=original_task
+            )
+            node = TaskNode(
+                id="task_001",
+                name="Execute Task",
+                description=original_task,
+                task_type="llm",
+                task_data={"prompt": original_task}
+            )
+            dag.add_node(node)
+            return dag
+        
+        # Build DAG from parsed data
+        dag = DAG(
+            id=f"dag_{self.agent.id}",
+            name=dag_data.get("task_name", original_task[:50]),
+            description=dag_data.get("task_description", original_task)
+        )
+        
+        # Add nodes
+        for node_data in dag_data.get("nodes", []):
+            node = TaskNode(
+                id=node_data["id"],
+                name=node_data["name"],
+                description=node_data.get("description", ""),
+                task_type=node_data.get("task_type", "generic"),
+                task_data=node_data.get("task_data", {})
+            )
+            dag.add_node(node)
+        
+        # Add edges based on dependencies
+        for node_data in dag_data.get("nodes", []):
+            node_id = node_data["id"]
+            for dep_id in node_data.get("dependencies", []):
+                if dep_id in dag.nodes:
+                    dag.add_edge(dep_id, node_id)
+        
+        return dag
+    
+    def _extract_json(self, content: str) -> Optional[Dict]:
+        """
+        Extract JSON from LLM response
+        
+        Args:
+            content: Raw LLM response
+            
+        Returns:
+            Parsed JSON dict or None
+        """
+        # Try to find JSON in markdown code blocks
+        import re
+        
+        # Look for ```json ... ``` blocks
+        json_match = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", content, re.DOTALL)
+        if json_match:
+            try:
+                return json.loads(json_match.group(1))
+            except json.JSONDecodeError:
+                pass
+        
+        # Look for raw JSON object
+        json_match = re.search(r"\{.*\}", content, re.DOTALL)
+        if json_match:
+            try:
+                return json.loads(json_match.group(0))
+            except json.JSONDecodeError:
+                pass
+        
+        return None
+    
+    def integrate_results(self, dag: DAG) -> Dict[str, Any]:
+        """
+        Integrate results from all completed tasks
+        
+        Args:
+            dag: DAG with completed tasks
+            
+        Returns:
+            Integrated result
+        """
+        self.agent.status = AgentStatus.EXECUTING
+        
+        # Collect all output data from completed nodes
+        results = {}
+        for node in dag.nodes.values():
+            if node.status == TaskNodeStatus.COMPLETED and node.output_data:
+                results[node.id] = node.output_data
+        
+        # Store aggregated results
+        self.agent.accumulated_result = results
+        self.agent.status = AgentStatus.COMPLETED
+        
+        return {
+            "success": True,
+            "dag_id": dag.id,
+            "total_tasks": dag.total_count,
+            "completed_tasks": dag.completed_count,
+            "failed_tasks": dag.failed_count,
+            "results": results
+        }
+    
+    async def review_and_refine(
+        self,
+        dag: DAG,
+        task: str,
+        progress_callback: Optional[Callable] = None
+    ) -> Optional[DAG]:
+        """
+        Review DAG execution and refine if needed
+        
+        Args:
+            dag: Current DAG state
+            task: Original task
+            progress_callback: Progress callback
+            
+        Returns:
+            Refined DAG or None if no refinement needed
+        """
+        if dag.is_success:
+            return None  # No refinement needed
+        
+        # Check if there are failed tasks
+        failed_nodes = [n for n in dag.nodes.values() if n.status == TaskNodeStatus.FAILED]
+        
+        if not failed_nodes:
+            return None
+        
+        # Build context for refinement
+        context = {
+            "original_task": task,
+            "failed_tasks": [
+                {
+                    "id": n.id,
+                    "name": n.name,
+                    "error": n.result.error if n.result else "Unknown error"
+                }
+                for n in failed_nodes
+            ],
+            "completed_tasks": [
+                {
+                    "id": n.id,
+                    "name": n.name,
+                    "output": n.output_data
+                }
+                for n in dag.nodes.values() if n.status == TaskNodeStatus.COMPLETED
+            ]
+        }
+        
+        messages = self.agent.get_context()
+        messages.append({
+            "role": "user",
+            "content": f"""Review the task execution and suggest refinements:
+
+Task: {task}
+
+Failed tasks: {json.dumps(context['failed_tasks'], indent=2)}
+
+Completed tasks: {json.dumps(context['completed_tasks'], indent=2)}
+
+If a task failed, you can:
+1. Break it into smaller tasks
+2. Change the approach
+3. Skip it if not critical
+
+Provide a refined subtask plan if needed, or indicate if the overall task should fail."""
+        })
+        
+        try:
+            response = await self.llm_client.sync_call(
+                model=self.agent.config.model,
+                messages=messages,
+                temperature=self.agent.config.temperature
+            )
+            
+            # Check if refinement was suggested
+            refined_dag = self._parse_dag_from_response(response.content, task)
+            
+            # Only return if we got a valid refinement
+            if refined_dag and refined_dag.nodes:
+                return refined_dag
+            
+        except Exception as e:
+            logger.error(f"DAG refinement failed: {e}")
+        
+        return None
--- a/luxx/agents/worker.py
+++ b/luxx/agents/worker.py
@ -0,0 +1,401 @@
+"""Worker Agent - executes specific tasks"""
+import json
+import logging
+import time
+from typing import Any, Dict, List, Optional, Callable
+
+from luxx.agents.core import Agent, AgentConfig, AgentType, AgentStatus
+from luxx.agents.dag import TaskNode, TaskNodeStatus, TaskResult
+from luxx.services.llm_client import llm_client
+from luxx.tools.core import registry as tool_registry, ToolContext, CommandPermission
+
+logger = logging.getLogger(__name__)
+
+
+class WorkerAgent:
+    """
+    Worker Agent
+    
+    Responsible for executing specific tasks using:
+    - LLM calls for reasoning tasks
+    - Tool execution for actionable tasks
+    
+    Follows sliding window context management.
+    """
+    
+    # System prompt for worker tasks
+    DEFAULT_SYSTEM_PROMPT = """You are a Worker Agent that executes specific tasks efficiently.
+
+Your responsibilities:
+1. Execute tasks assigned to you by the Supervisor
+2. Use appropriate tools when needed
+3. Report results clearly with structured output_data for dependent tasks
+4. Be concise and focused on the task at hand
+
+Output format:
+- Provide clear, structured results
+- Include output_data for any data that dependent tasks might need
+- If a tool fails, explain the error clearly
+"""
+    
+    def __init__(
+        self,
+        agent: Agent,
+        llm_client=None,
+        tool_executor=None
+    ):
+        """
+        Initialize Worker Agent
+        
+        Args:
+            agent: Agent instance (should be WORKER type)
+            llm_client: LLM client instance
+            tool_executor: Tool executor instance
+        """
+        self.agent = agent
+        self.llm_client = llm_client or llm_client
+        self.tool_executor = tool_executor
+        
+        # Ensure agent has worker system prompt
+        if not self.agent.config.system_prompt:
+            self.agent.config.system_prompt = self.DEFAULT_SYSTEM_PROMPT
+    
+    async def execute_task(
+        self,
+        task_node: TaskNode,
+        context: Dict[str, Any],
+        parent_outputs: Dict[str, Dict[str, Any]] = None,
+        progress_callback: Optional[Callable] = None
+    ) -> TaskResult:
+        """
+        Execute a task node
+        
+        Args:
+            task_node: Task node to execute
+            context: Execution context (workspace, user info, etc.)
+            parent_outputs: Output data from parent tasks (dependency results)
+            progress_callback: Optional callback for progress updates
+            
+        Returns:
+            TaskResult with execution outcome
+        """
+        self.agent.status = AgentStatus.EXECUTING
+        self.agent.current_task_id = task_node.id
+        
+        start_time = time.time()
+        
+        if progress_callback:
+            progress_callback(0.0, f"Starting task: {task_node.name}")
+        
+        try:
+            # Merge parent outputs into context
+            execution_context = self._prepare_context(context, parent_outputs)
+            
+            # Execute based on task type
+            if task_node.task_type == "llm":
+                result = await self._execute_llm_task(task_node, execution_context, progress_callback)
+            elif task_node.task_type == "code":
+                result = await self._execute_code_task(task_node, execution_context, progress_callback)
+            elif task_node.task_type == "shell":
+                result = await self._execute_shell_task(task_node, execution_context, progress_callback)
+            elif task_node.task_type == "file":
+                result = await self._execute_file_task(task_node, execution_context, progress_callback)
+            else:
+                result = await self._execute_generic_task(task_node, execution_context, progress_callback)
+            
+            execution_time = time.time() - start_time
+            result.execution_time = execution_time
+            
+            if progress_callback:
+                progress_callback(1.0, f"Task complete: {task_node.name}")
+            
+            self.agent.status = AgentStatus.IDLE
+            return result
+            
+        except Exception as e:
+            logger.error(f"Task execution failed: {e}")
+            execution_time = time.time() - start_time
+            self.agent.status = AgentStatus.FAILED
+            return TaskResult.fail(error=str(e))
+    
+    def _prepare_context(
+        self,
+        context: Dict[str, Any],
+        parent_outputs: Dict[str, Dict[str, Any]] = None
+    ) -> Dict[str, Any]:
+        """
+        Prepare execution context by merging parent outputs
+        
+        Args:
+            context: Base context
+            parent_outputs: Output from parent tasks
+            
+        Returns:
+            Merged context
+        """
+        execution_context = context.copy()
+        
+        if parent_outputs:
+            # Merge parent outputs into context
+            merged = {}
+            for parent_id, outputs in parent_outputs.items():
+                merged.update(outputs)
+            execution_context["parent_outputs"] = parent_outputs
+            execution_context["merged_data"] = merged
+        
+        # Add user permission level
+        if "user_permission_level" not in execution_context:
+            execution_context["user_permission_level"] = self.agent.effective_permission.value
+        
+        return execution_context
+    
+    async def _execute_llm_task(
+        self,
+        task_node: TaskNode,
+        context: Dict[str, Any],
+        progress_callback: Optional[Callable] = None
+    ) -> TaskResult:
+        """Execute LLM reasoning task"""
+        task_data = task_node.task_data
+        
+        # Build prompt
+        prompt = task_data.get("prompt", task_node.description)
+        system_prompt = task_data.get("system", self.agent.config.system_prompt)
+        
+        messages = [{"role": "system", "content": system_prompt}]
+        
+        # Add parent data if available
+        if "merged_data" in context:
+            merged = context["merged_data"]
+            context_info = "\n".join([f"{k}: {v}" for k, v in merged.items()])
+            messages.append({
+                "role": "system",
+                "content": f"Context from dependent tasks:\n{context_info}"
+            })
+        
+        messages.append({"role": "user", "content": prompt})
+        
+        if progress_callback:
+            progress_callback(0.3, "Calling LLM...")
+        
+        try:
+            response = await self.llm_client.sync_call(
+                model=self.agent.config.model,
+                messages=messages,
+                temperature=self.agent.config.temperature,
+                max_tokens=self.agent.config.max_tokens
+            )
+            
+            return TaskResult.ok(
+                data=response.content,
+                output_data=task_node.task_data.get("output_template", {}).copy()
+            )
+            
+        except Exception as e:
+            return TaskResult.fail(error=str(e))
+    
+    async def _execute_code_task(
+        self,
+        task_node: TaskNode,
+        context: Dict[str, Any],
+        progress_callback: Optional[Callable] = None
+    ) -> TaskResult:
+        """Execute code generation/writing task"""
+        task_data = task_node.task_data
+        
+        # Build prompt for code generation
+        prompt = task_data.get("prompt", task_node.description)
+        language = task_data.get("language", "python")
+        requirements = task_data.get("requirements", "")
+        
+        messages = [
+            {"role": "system", "content": f"You are a {language} programmer. Write clean, efficient code."},
+            {"role": "user", "content": f"Task: {prompt}\n\nRequirements: {requirements}"}
+        ]
+        
+        if progress_callback:
+            progress_callback(0.3, "Generating code...")
+        
+        try:
+            response = await self.llm_client.sync_call(
+                model=self.agent.config.model,
+                messages=messages,
+                temperature=0.2,  # Lower temp for code
+                max_tokens=4096
+            )
+            
+            return TaskResult.ok(
+                data=response.content,
+                output_data={
+                    "code": response.content,
+                    "language": language
+                }
+            )
+            
+        except Exception as e:
+            return TaskResult.fail(error=str(e))
+    
+    async def _execute_shell_task(
+        self,
+        task_node: TaskNode,
+        context: Dict[str, Any],
+        progress_callback: Optional[Callable] = None
+    ) -> TaskResult:
+        """Execute shell command task"""
+        task_data = task_node.task_data
+        
+        command = task_data.get("command")
+        if not command:
+            return TaskResult.fail(error="No command specified")
+        
+        if progress_callback:
+            progress_callback(0.3, f"Executing: {command[:50]}...")
+        
+        # Build tool context
+        tool_ctx = ToolContext(
+            workspace=context.get("workspace"),
+            user_id=context.get("user_id"),
+            username=context.get("username"),
+            extra={"user_permission_level": context.get("user_permission_level", 1)}
+        )
+        
+        try:
+            # Execute shell command via tool
+            result = tool_registry.execute(
+                "shell_exec",
+                {"command": command},
+                context=tool_ctx
+            )
+            
+            if result.get("success"):
+                return TaskResult.ok(
+                    data=result.get("data", {}).get("output", ""),
+                    output_data={"output": result.get("data", {}).get("output", "")}
+                )
+            else:
+                return TaskResult.fail(error=result.get("error", "Shell execution failed"))
+                
+        except Exception as e:
+            return TaskResult.fail(error=str(e))
+    
+    async def _execute_file_task(
+        self,
+        task_node: TaskNode,
+        context: Dict[str, Any],
+        progress_callback: Optional[Callable] = None
+    ) -> TaskResult:
+        """Execute file operation task"""
+        task_data = task_node.task_data
+        
+        operation = task_data.get("operation")
+        file_path = task_data.get("path")
+        content = task_data.get("content", "")
+        
+        if not operation or not file_path:
+            return TaskResult.fail(error="Missing operation or path")
+        
+        if progress_callback:
+            progress_callback(0.3, f"File operation: {operation} {file_path}")
+        
+        tool_ctx = ToolContext(
+            workspace=context.get("workspace"),
+            user_id=context.get("user_id"),
+            username=context.get("username"),
+            extra={"user_permission_level": context.get("user_permission_level", 1)}
+        )
+        
+        try:
+            tool_name = f"file_{operation}"
+            result = tool_registry.execute(
+                tool_name,
+                {"path": file_path, "content": content},
+                context=tool_ctx
+            )
+            
+            if result.get("success"):
+                return TaskResult.ok(
+                    data=result.get("data"),
+                    output_data={"path": file_path, "operation": operation}
+                )
+            else:
+                return TaskResult.fail(error=result.get("error", "File operation failed"))
+                
+        except Exception as e:
+            return TaskResult.fail(error=str(e))
+    
+    async def _execute_generic_task(
+        self,
+        task_node: TaskNode,
+        context: Dict[str, Any],
+        progress_callback: Optional[Callable] = None
+    ) -> TaskResult:
+        """Execute generic task using LLM with tools"""
+        task_data = task_node.task_data
+        
+        # Build prompt
+        prompt = task_data.get("prompt", task_node.description)
+        tools = task_data.get("tools", [])
+        
+        messages = [
+            {"role": "system", "content": self.agent.config.system_prompt},
+            {"role": "user", "content": prompt}
+        ]
+        
+        # Get tool definitions if specified
+        tool_defs = None
+        if tools:
+            tool_defs = [tool_registry.get(t).to_openai_format() for t in tools if tool_registry.get(t)]
+        
+        if progress_callback:
+            progress_callback(0.2, "Processing task...")
+        
+        max_iterations = 5
+        iteration = 0
+        
+        while iteration < max_iterations:
+            try:
+                response = await self.llm_client.sync_call(
+                    model=self.agent.config.model,
+                    messages=messages,
+                    tools=tool_defs,
+                    temperature=self.agent.config.temperature,
+                    max_tokens=self.agent.config.max_tokens
+                )
+                
+                # Add assistant response
+                messages.append({"role": "assistant", "content": response.content})
+                
+                # Check for tool calls
+                if response.tool_calls:
+                    if progress_callback:
+                        progress_callback(0.5, f"Executing {len(response.tool_calls)} tools...")
+                    
+                    # Execute tools
+                    tool_results = self.tool_executor.process_tool_calls(
+                        response.tool_calls,
+                        context
+                    )
+                    
+                    # Add tool results
+                    for tr in tool_results:
+                        messages.append({
+                            "role": "tool",
+                            "tool_call_id": tr["tool_call_id"],
+                            "content": tr["content"]
+                        })
+                    
+                    if progress_callback:
+                        progress_callback(0.8, "Tools executed")
+                else:
+                    # No tool calls, task complete
+                    return TaskResult.ok(
+                        data=response.content,
+                        output_data=task_data.get("output_template", {})
+                    )
+                    
+            except Exception as e:
+                return TaskResult.fail(error=str(e))
+            
+            iteration += 1
+        
+        return TaskResult.fail(error="Max iterations exceeded")
--- a/luxx/routes/init.py
+++ b/luxx/routes/init.py
@ -2,6 +2,7 @@
 from fastapi import APIRouter

 from luxx.routes import auth, conversations, messages, tools, providers
+from luxx.routes.agents_ws import router as agents_ws_router


 api_router = APIRouter()
@ -12,3 +13,4 @@ api_router.include_router(conversations.router)
 api_router.include_router(messages.router)
 api_router.include_router(tools.router)
 api_router.include_router(providers.router)
+api_router.include_router(agents_ws_router)
--- a/luxx/routes/agents_ws.py
+++ b/luxx/routes/agents_ws.py
@ -0,0 +1,385 @@
+"""WebSocket routes for agent real-time communication"""
+import asyncio
+import json
+import logging
+import threading
+from typing import Any, Dict, Optional, Set
+from datetime import datetime
+from fastapi import APIRouter, WebSocket, WebSocketDisconnect, Depends
+import uuid
+
+from luxx.agents.core import Agent, AgentConfig, AgentType, AgentStatus
+from luxx.agents.dag import DAG, TaskNode, TaskNodeStatus, TaskResult
+from luxx.agents.supervisor import SupervisorAgent
+from luxx.agents.worker import WorkerAgent
+from luxx.agents.dag_scheduler import DAGScheduler, SchedulerPool
+from luxx.agents.registry import AgentRegistry
+from luxx.services.llm_client import llm_client
+from luxx.tools.executor import ToolExecutor
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter()
+
+
+class ConnectionManager:
+    """
+    WebSocket Connection Manager
+    
+    Manages WebSocket connections for real-time agent progress updates.
+    Features:
+    - Connection tracking by task_id
+    - Heartbeat mechanism
+    - Progress broadcasting
+    """
+    
+    def __init__(self):
+        # task_id -> set of websocket connections
+        self._connections: Dict[str, Set[WebSocket]] = {}
+        # websocket -> task_id mapping
+        self._ws_to_task: Dict[WebSocket, str] = {}
+        # heartbeat tasks
+        self._heartbeat_tasks: Dict[WebSocket, asyncio.Task] = {}
+        # lock for thread safety
+        self._lock = threading.Lock()
+        # heartbeat interval in seconds
+        self._heartbeat_interval = 30
+    
+    def connect(self, websocket: WebSocket, task_id: str) -> None:
+        """
+        Accept and register a WebSocket connection
+        
+        Args:
+            websocket: WebSocket connection
+            task_id: Task ID to subscribe to
+        """
+        websocket.accept()
+        
+        with self._lock:
+            if task_id not in self._connections:
+                self._connections[task_id] = set()
+            self._connections[task_id].add(websocket)
+            self._ws_to_task[websocket] = task_id
+        
+        logger.info(f"WebSocket connected for task {task_id}")
+    
+    def disconnect(self, websocket: WebSocket) -> None:
+        """
+        Unregister a WebSocket connection
+        
+        Args:
+            websocket: WebSocket connection
+        """
+        with self._lock:
+            task_id = self._ws_to_task.pop(websocket, None)
+            if task_id and task_id in self._connections:
+                self._connections[task_id].discard(websocket)
+                if not self._connections[task_id]:
+                    del self._connections[task_id]
+            
+            # Cancel heartbeat task
+            if websocket in self._heartbeat_tasks:
+                self._heartbeat_tasks[websocket].cancel()
+                del self._heartbeat_tasks[websocket]
+        
+        if task_id:
+            logger.info(f"WebSocket disconnected for task {task_id}")
+    
+    async def send_to_task(self, task_id: str, message: Dict[str, Any]) -> None:
+        """
+        Send message to all connections subscribed to a task
+        
+        Args:
+            task_id: Task ID
+            message: Message to send
+        """
+        with self._lock:
+            connections = self._connections.get(task_id, set()).copy()
+        
+        if not connections:
+            return
+        
+        dead_connections = set()
+        
+        for websocket in connections:
+            try:
+                await websocket.send_json(message)
+            except Exception as e:
+                logger.warning(f"Failed to send to websocket: {e}")
+                dead_connections.add(websocket)
+        
+        # Clean up dead connections
+        for ws in dead_connections:
+            self.disconnect(ws)
+    
+    async def broadcast(self, message: Dict[str, Any]) -> None:
+        """
+        Broadcast message to all connections
+        
+        Args:
+            message: Message to broadcast
+        """
+        with self._lock:
+            all_connections = list(self._ws_to_task.keys())
+        
+        for websocket in all_connections:
+            try:
+                await websocket.send_json(message)
+            except Exception as e:
+                logger.warning(f"Failed to broadcast: {e}")
+    
+    async def send_personal(self, websocket: WebSocket, message: Dict[str, Any]) -> None:
+        """
+        Send message to a specific connection
+        
+        Args:
+            websocket: Target WebSocket
+            message: Message to send
+        """
+        try:
+            await websocket.send_json(message)
+        except Exception as e:
+            logger.warning(f"Failed to send personal message: {e}")
+    
+    def start_heartbeat(self, websocket: WebSocket) -> None:
+        """
+        Start heartbeat for a connection
+        
+        Args:
+            websocket: WebSocket connection
+        """
+        async def heartbeat_loop():
+            while True:
+                await asyncio.sleep(self._heartbeat_interval)
+                try:
+                    await websocket.send_json({
+                        "type": "heartbeat",
+                        "interval": self._heartbeat_interval
+                    })
+                except Exception:
+                    break
+        
+        task = asyncio.create_task(heartbeat_loop())
+        with self._lock:
+            self._heartbeat_tasks[websocket] = task
+    
+    @property
+    def connection_count(self) -> int:
+        """Total number of connections"""
+        with self._lock:
+            return len(self._ws_to_task)
+    
+    def get_task_connections(self, task_id: str) -> int:
+        """Get number of connections for a task"""
+        with self._lock:
+            return len(self._connections.get(task_id, set()))
+
+
+# Global connection manager
+connection_manager = ConnectionManager()
+
+# Global scheduler pool
+scheduler_pool = SchedulerPool(max_concurrent=10)
+
+# Global tool executor
+tool_executor = ToolExecutor()
+
+
+class ProgressEmitter:
+    """
+    Progress emitter that sends updates via WebSocket
+    
+    Wraps DAGScheduler progress callback to emit WebSocket messages.
+    """
+    
+    def __init__(self, task_id: str, connection_manager: ConnectionManager):
+        self.task_id = task_id
+        self.connection_manager = connection_manager
+    
+    def __call__(self, node_id: str, progress: float, message: str = "") -> None:
+        """Progress callback"""
+        if node_id == "dag":
+            # DAG-level progress
+            asyncio.create_task(self.connection_manager.send_to_task(
+                self.task_id,
+                {
+                    "type": "dag_progress",
+                    "data": {
+                        "progress": progress,
+                        "message": message
+                    }
+                }
+            ))
+        else:
+            # Node-level progress
+            asyncio.create_task(self.connection_manager.send_to_task(
+                self.task_id,
+                {
+                    "type": "node_progress",
+                    "data": {
+                        "node_id": node_id,
+                        "progress": progress,
+                        "message": message
+                    }
+                }
+            ))
+
+
+@router.websocket("/ws/dag/{task_id}")
+async def dag_websocket(websocket: WebSocket, task_id: str):
+    """
+    WebSocket endpoint for DAG progress updates
+    
+    Protocol:
+    - Client sends: subscribe, get_status, ping, cancel_task
+    - Server sends: subscribed, heartbeat, dag_start, node_start, node_progress, 
+                    node_complete, node_error, dag_complete, pong
+    """
+    # Accept connection
+    connection_manager.connect(websocket, task_id)
+    
+    # Send subscribed confirmation
+    await connection_manager.send_personal(websocket, {
+        "type": "subscribed",
+        "task_id": task_id
+    })
+    
+    # Start heartbeat
+    connection_manager.start_heartbeat(websocket)
+    
+    # Get scheduler if exists
+    scheduler = scheduler_pool.get(task_id)
+    if scheduler:
+        # Send current DAG state
+        await connection_manager.send_personal(websocket, {
+            "type": "dag_status",
+            "data": scheduler.dag.to_dict()
+        })
+    
+    try:
+        while True:
+            # Receive message
+            data = await websocket.receive_text()
+            
+            try:
+                message = json.loads(data)
+            except json.JSONDecodeError:
+                await connection_manager.send_personal(websocket, {
+                    "type": "error",
+                    "message": "Invalid JSON"
+                })
+                continue
+            
+            msg_type = message.get("type")
+            
+            if msg_type == "subscribe":
+                # Already subscribed on connect
+                await connection_manager.send_personal(websocket, {
+                    "type": "subscribed",
+                    "task_id": task_id
+                })
+            
+            elif msg_type == "get_status":
+                scheduler = scheduler_pool.get(task_id)
+                if scheduler:
+                    await connection_manager.send_personal(websocket, {
+                        "type": "dag_status",
+                        "data": scheduler.dag.to_dict()
+                    })
+                else:
+                    await connection_manager.send_personal(websocket, {
+                        "type": "dag_status",
+                        "data": None
+                    })
+            
+            elif msg_type == "ping":
+                await connection_manager.send_personal(websocket, {
+                    "type": "pong"
+                })
+            
+            elif msg_type == "cancel_task":
+                if scheduler_pool.cancel(task_id):
+                    await connection_manager.send_personal(websocket, {
+                        "type": "task_cancelled",
+                        "task_id": task_id
+                    })
+                else:
+                    await connection_manager.send_personal(websocket, {
+                        "type": "error",
+                        "message": "Task not found or already completed"
+                    })
+            
+            else:
+                await connection_manager.send_personal(websocket, {
+                    "type": "error",
+                    "message": f"Unknown message type: {msg_type}"
+                })
+    
+    except WebSocketDisconnect:
+        connection_manager.disconnect(websocket)
+    except Exception as e:
+        logger.error(f"WebSocket error: {e}")
+        connection_manager.disconnect(websocket)
+
+
+# Helper functions to emit progress from scheduler
+
+def create_progress_emitter(task_id: str) -> ProgressEmitter:
+    """Create a progress emitter for a task"""
+    return ProgressEmitter(task_id, connection_manager)
+
+
+async def emit_dag_start(task_id: str, dag: DAG) -> None:
+    """Emit DAG start event"""
+    await connection_manager.send_to_task(task_id, {
+        "type": "dag_start",
+        "data": {
+            "graph": dag.to_dict()
+        }
+    })
+
+
+async def emit_node_start(task_id: str, node: TaskNode) -> None:
+    """Emit node start event"""
+    await connection_manager.send_to_task(task_id, {
+        "type": "node_start",
+        "data": {
+            "node_id": node.id,
+            "name": node.name,
+            "status": node.status.value
+        }
+    })
+
+
+async def emit_node_complete(task_id: str, node: TaskNode) -> None:
+    """Emit node complete event"""
+    await connection_manager.send_to_task(task_id, {
+        "type": "node_complete",
+        "data": {
+            "node_id": node.id,
+            "result": node.result.to_dict() if node.result else None,
+            "output_data": node.output_data
+        }
+    })
+
+
+async def emit_node_error(task_id: str, node: TaskNode, error: str) -> None:
+    """Emit node error event"""
+    await connection_manager.send_to_task(task_id, {
+        "type": "node_error",
+        "data": {
+            "node_id": node.id,
+            "error": error
+        }
+    })
+
+
+async def emit_dag_complete(task_id: str, success: bool, results: Dict) -> None:
+    """Emit DAG complete event"""
+    await connection_manager.send_to_task(task_id, {
+        "type": "dag_complete",
+        "data": {
+            "success": success,
+            "results": results
+        }
+    })