""" Agent lifecycle manager — registry for background spawn_agent and aider_run tasks. Tracks running and recently completed agents in-process. On completion, fires notification.notify() if notify=True (same channel used by reminders and cron jobs). Records are kept for 24 hours after completion, then pruned on next registration. """ import asyncio import logging import uuid from dataclasses import dataclass, field from datetime import datetime, timedelta logger = logging.getLogger(__name__) _PRUNE_AFTER = timedelta(hours=24) _RESULT_PREVIEW_CHARS = 500 _TASK_PREVIEW_CHARS = 200 @dataclass class AgentRecord: agent_id: str level: int # 1 = persona, 2 = specialized sub-agent, 3 = support agent role: str # e.g. "coder", "research", "chat" task: str # first _TASK_PREVIEW_CHARS of the task status: str # running / done / failed / cancelled / timeout started: datetime user: str parent_id: str | None = None # agent_id of the spawner (lineage tracking) finished: datetime | None = None result: str | None = None # first _RESULT_PREVIEW_CHARS on completion notify: bool = False # push notification on completion _task_ref: "asyncio.Task | None" = field(default=None, repr=False) # Module-level registry — in-process only, not persisted across restarts. _agents: dict[str, AgentRecord] = {} _lock = asyncio.Lock() async def register( user: str, role: str, task: str, level: int = 2, parent_id: str | None = None, notify: bool = False, ) -> AgentRecord: """Create and register a new running agent. Returns the record (agent_id is set).""" agent_id = str(uuid.uuid4()) rec = AgentRecord( agent_id=agent_id, level=level, role=role, task=task[:_TASK_PREVIEW_CHARS], status="running", started=datetime.now(), user=user, parent_id=parent_id, notify=notify, ) async with _lock: _prune_locked() _agents[agent_id] = rec logger.info( "agent_manager: registered %s role=%s level=%d user=%s task=%.60s", agent_id[:8], role, level, user, task, ) return rec def set_task_ref(agent_id: str, task_ref: "asyncio.Task") -> None: """Store the asyncio.Task reference so it can be cancelled later. Call immediately after asyncio.create_task() — before the event loop yields. """ rec = _agents.get(agent_id) if rec: rec._task_ref = task_ref async def finish(agent_id: str, result: str, status: str = "done") -> None: """Mark an agent complete, store the result, and notify the user if requested.""" async with _lock: rec = _agents.get(agent_id) if not rec: return rec.status = status rec.finished = datetime.now() rec.result = (result or "")[:_RESULT_PREVIEW_CHARS] logger.info("agent_manager: finished %s status=%s", agent_id[:8], status) if rec.notify and status != "cancelled": try: from notification import notify as _notify elapsed = int((rec.finished - rec.started).total_seconds()) emoji = "✅" if status == "done" else "⚠️" preview = (rec.result or "(no output)")[:200] msg = f"{emoji} Agent done [{rec.role}, {elapsed}s]: {preview}" await _notify(rec.user, msg) except Exception as e: logger.warning("agent_manager: notification failed for %s: %s", agent_id[:8], e) async def cancel_agent(agent_id: str, user: str) -> str: """Cancel a running background agent. Returns a human-readable status message.""" async with _lock: rec = _agents.get(agent_id) if not rec: return f"No agent found: {agent_id}" if rec.user != user: return "Access denied." if rec.status != "running": return f"Agent {agent_id[:8]}… is already {rec.status}." task_ref = rec._task_ref rec.status = "cancelled" rec.finished = datetime.now() if task_ref and not task_ref.done(): task_ref.cancel() logger.info("agent_manager: cancelled %s by user=%s", agent_id[:8], user) return f"Agent {agent_id[:8]}… cancelled." def get(agent_id: str) -> AgentRecord | None: """Look up an agent record by ID.""" return _agents.get(agent_id) def list_agents(user: str, status: str | None = None, limit: int = 10) -> list[AgentRecord]: """Return recent agents for a user, newest first. Does not acquire the lock — safe for read-only listing (Python dict iteration is thread-safe for reads; we don't care about racing with a concurrent registration). """ records = [r for r in _agents.values() if r.user == user] if status: records = [r for r in records if r.status == status] records.sort(key=lambda r: r.started, reverse=True) return records[:limit] def _prune_locked() -> None: """Remove completed agents older than _PRUNE_AFTER. Must be called inside _lock.""" cutoff = datetime.now() - _PRUNE_AFTER stale = [ aid for aid, r in _agents.items() if r.status != "running" and r.finished and r.finished < cutoff ] for aid in stale: del _agents[aid] if stale: logger.debug("agent_manager: pruned %d stale records", len(stale))