agent_manager.py (new): - AgentRecord dataclass: agent_id, level (1/2/3), role, task, status, started, parent_id (lineage), finished, result, notify, _task_ref - register() / finish() / cancel_agent() / list_agents() / get() / set_task_ref() - Calls notification.notify() on completion when notify=True (same channel as reminders and cron completions) - 24-hour pruning of completed records on each new registration spawn_agent (tools/agents.py): - background=True: fires asyncio.create_task(), registers in agent_manager, returns agent_id string immediately — sync path unchanged (no regression) - notify=True: push/Talk notification when the background task completes - Level enforcement: _agent_level param tracks hierarchy depth; when spawning from Level 2, child automatically gets spawn_agent + aider_run denied so Level 3 agents cannot delegate further New lifecycle tools (tools/agents.py + __init__.py): - agent_status(agent_id) — status, role, level, elapsed, task, result preview; user-level - agent_list(status, limit) — all agents for current user, newest first; user-level - agent_cancel(agent_id) — kills background task; admin-only, confirm-required tests/test_agent_manager.py (new, 41 tests): - agent_manager CRUD, pruning, notification hook - spawn_agent background: returns immediately, completes async, timeout, failure - Level enforcement: L1→L2 permits spawn, L2→L3 auto-denies; explicit tool_list path - agent_status / agent_list / agent_cancel output formatting - aider_run background: returns agent_id, completes async, sync path unchanged - All tests run without browser or Cortex service (~2.5s total) Run: cd cortex && .venv/bin/python -m pytest tests/test_agent_manager.py -v Docs: ARCH__FUTURE.md §13 (full design), ROADMAP.md, TODO__Agents.md, MASTER.md, HELP.md (orchestrator description corrected, tool schema line updated to reflect keyword routing), CLAUDE.md tool count 66→69. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
159 lines
5.3 KiB
Python
159 lines
5.3 KiB
Python
"""
|
|
Agent lifecycle manager — registry for background spawn_agent and aider_run tasks.
|
|
|
|
Tracks running and recently completed agents in-process. On completion, fires
|
|
notification.notify() if notify=True (same channel used by reminders and cron jobs).
|
|
|
|
Records are kept for 24 hours after completion, then pruned on next registration.
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
import uuid
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime, timedelta
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_PRUNE_AFTER = timedelta(hours=24)
|
|
_RESULT_PREVIEW_CHARS = 500
|
|
_TASK_PREVIEW_CHARS = 200
|
|
|
|
|
|
@dataclass
|
|
class AgentRecord:
|
|
agent_id: str
|
|
level: int # 1 = persona, 2 = specialized sub-agent, 3 = support agent
|
|
role: str # e.g. "coder", "research", "chat"
|
|
task: str # first _TASK_PREVIEW_CHARS of the task
|
|
status: str # running / done / failed / cancelled / timeout
|
|
started: datetime
|
|
user: str
|
|
parent_id: str | None = None # agent_id of the spawner (lineage tracking)
|
|
finished: datetime | None = None
|
|
result: str | None = None # first _RESULT_PREVIEW_CHARS on completion
|
|
notify: bool = False # push notification on completion
|
|
_task_ref: "asyncio.Task | None" = field(default=None, repr=False)
|
|
|
|
|
|
# Module-level registry — in-process only, not persisted across restarts.
|
|
_agents: dict[str, AgentRecord] = {}
|
|
_lock = asyncio.Lock()
|
|
|
|
|
|
async def register(
|
|
user: str,
|
|
role: str,
|
|
task: str,
|
|
level: int = 2,
|
|
parent_id: str | None = None,
|
|
notify: bool = False,
|
|
) -> AgentRecord:
|
|
"""Create and register a new running agent. Returns the record (agent_id is set)."""
|
|
agent_id = str(uuid.uuid4())
|
|
rec = AgentRecord(
|
|
agent_id=agent_id,
|
|
level=level,
|
|
role=role,
|
|
task=task[:_TASK_PREVIEW_CHARS],
|
|
status="running",
|
|
started=datetime.now(),
|
|
user=user,
|
|
parent_id=parent_id,
|
|
notify=notify,
|
|
)
|
|
async with _lock:
|
|
_prune_locked()
|
|
_agents[agent_id] = rec
|
|
logger.info(
|
|
"agent_manager: registered %s role=%s level=%d user=%s task=%.60s",
|
|
agent_id[:8], role, level, user, task,
|
|
)
|
|
return rec
|
|
|
|
|
|
def set_task_ref(agent_id: str, task_ref: "asyncio.Task") -> None:
|
|
"""Store the asyncio.Task reference so it can be cancelled later.
|
|
|
|
Call immediately after asyncio.create_task() — before the event loop yields.
|
|
"""
|
|
rec = _agents.get(agent_id)
|
|
if rec:
|
|
rec._task_ref = task_ref
|
|
|
|
|
|
async def finish(agent_id: str, result: str, status: str = "done") -> None:
|
|
"""Mark an agent complete, store the result, and notify the user if requested."""
|
|
async with _lock:
|
|
rec = _agents.get(agent_id)
|
|
if not rec:
|
|
return
|
|
rec.status = status
|
|
rec.finished = datetime.now()
|
|
rec.result = (result or "")[:_RESULT_PREVIEW_CHARS]
|
|
|
|
logger.info("agent_manager: finished %s status=%s", agent_id[:8], status)
|
|
|
|
if rec.notify and status != "cancelled":
|
|
try:
|
|
from notification import notify as _notify
|
|
elapsed = int((rec.finished - rec.started).total_seconds())
|
|
emoji = "✅" if status == "done" else "⚠️"
|
|
preview = (rec.result or "(no output)")[:200]
|
|
msg = f"{emoji} Agent done [{rec.role}, {elapsed}s]: {preview}"
|
|
await _notify(rec.user, msg)
|
|
except Exception as e:
|
|
logger.warning("agent_manager: notification failed for %s: %s", agent_id[:8], e)
|
|
|
|
|
|
async def cancel_agent(agent_id: str, user: str) -> str:
|
|
"""Cancel a running background agent. Returns a human-readable status message."""
|
|
async with _lock:
|
|
rec = _agents.get(agent_id)
|
|
if not rec:
|
|
return f"No agent found: {agent_id}"
|
|
if rec.user != user:
|
|
return "Access denied."
|
|
if rec.status != "running":
|
|
return f"Agent {agent_id[:8]}… is already {rec.status}."
|
|
task_ref = rec._task_ref
|
|
rec.status = "cancelled"
|
|
rec.finished = datetime.now()
|
|
|
|
if task_ref and not task_ref.done():
|
|
task_ref.cancel()
|
|
|
|
logger.info("agent_manager: cancelled %s by user=%s", agent_id[:8], user)
|
|
return f"Agent {agent_id[:8]}… cancelled."
|
|
|
|
|
|
def get(agent_id: str) -> AgentRecord | None:
|
|
"""Look up an agent record by ID."""
|
|
return _agents.get(agent_id)
|
|
|
|
|
|
def list_agents(user: str, status: str | None = None, limit: int = 10) -> list[AgentRecord]:
|
|
"""Return recent agents for a user, newest first.
|
|
|
|
Does not acquire the lock — safe for read-only listing (Python dict iteration is
|
|
thread-safe for reads; we don't care about racing with a concurrent registration).
|
|
"""
|
|
records = [r for r in _agents.values() if r.user == user]
|
|
if status:
|
|
records = [r for r in records if r.status == status]
|
|
records.sort(key=lambda r: r.started, reverse=True)
|
|
return records[:limit]
|
|
|
|
|
|
def _prune_locked() -> None:
|
|
"""Remove completed agents older than _PRUNE_AFTER. Must be called inside _lock."""
|
|
cutoff = datetime.now() - _PRUNE_AFTER
|
|
stale = [
|
|
aid for aid, r in _agents.items()
|
|
if r.status != "running" and r.finished and r.finished < cutoff
|
|
]
|
|
for aid in stale:
|
|
del _agents[aid]
|
|
if stale:
|
|
logger.debug("agent_manager: pruned %d stale records", len(stale))
|