From 09d775b47bd99051e69ec41b4ce29555ddd8a755 Mon Sep 17 00:00:00 2001 From: Scott Idem Date: Fri, 8 May 2026 22:48:21 -0400 Subject: [PATCH] feat: spawn_agent tool + host max_concurrent + docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a synchronous sub-agent spawning tool that lets the orchestrator delegate tasks to a specific role's model and tool set. - cortex/tools/agents.py: spawn_agent(task, role, tier, timeout, max_rounds) - Supports local_openai and gemini_api model types - Per-host asyncio semaphore (keyed by host_id or model type) - asyncio.wait_for() enforces timeout; admin-only tool - cortex/model_registry.py: max_concurrent field in host schema (default 3, clamped 1-20); backfilled on _normalize() for existing hosts - cortex/routers/local_llm.py + local_llm.html: "Max parallel" number input in host add/edit forms - cortex/tools/__init__.py: spawn_agent registered in TOOL_CATEGORIES["Agents"], _CALLABLES, TOOL_ROLES (admin), and _ALL_DECLARATIONS - Docs: TOOLS.md count 44→45, spawn_agent section; HELP.md tool table updated; ARCH__FUTURE.md Round 2 completed items; TODO__Agents.md spawn_agent checked; CLAUDE.md tool count and list updated Co-Authored-By: Claude Sonnet 4.6 --- CLAUDE.md | 6 +- cortex/model_registry.py | 23 ++-- cortex/routers/local_llm.py | 20 ++-- cortex/static/HELP.md | 9 +- cortex/static/TOOLS.md | 10 +- cortex/static/local_llm.html | 4 + cortex/tools/__init__.py | 6 + cortex/tools/agents.py | 204 ++++++++++++++++++++++++++++++++++ documentation/ARCH__FUTURE.md | 13 ++- documentation/TODO__Agents.md | 6 + 10 files changed, 275 insertions(+), 26 deletions(-) create mode 100644 cortex/tools/agents.py diff --git a/CLAUDE.md b/CLAUDE.md index e0878d6..c374b11 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -255,14 +255,14 @@ Cortex is running and stable. All channels are live: Active users: scott (inara), holly (tina), brian (wintermute) -**40 orchestrator tools:** web_search, http_fetch, -file_read/list/write, shell_exec, claude_allow_dir, +**45 orchestrator tools:** web_search, http_fetch, +file_read/list/write/session_search, shell_exec, claude_allow_dir, cortex_restart/logs/status/update, task_list/create/update/complete, cron_list/add/remove/toggle, reminders_add/list/remove/clear, scratch_read/write/append/clear, web_push, email_send, nc_talk_send, ae_journal_list/search/entries_list/entry_read/entry_create/entry_update/entry_disable/entry_append/entry_prepend, -ae_task_list. +ae_task_list, agent_notes_read/write/append/clear, spawn_agent. See `documentation/TODO__Agents.md` for the active task list. See `documentation/ROADMAP.md` for phases and what's next. diff --git a/cortex/model_registry.py b/cortex/model_registry.py index 04698ae..f859796 100644 --- a/cortex/model_registry.py +++ b/cortex/model_registry.py @@ -189,6 +189,7 @@ def _normalize(data: dict) -> dict: """Back-fill missing fields introduced by schema additions.""" for h in data.get("hosts", []): h.setdefault("host_type", "openwebui") + h.setdefault("max_concurrent", 3) data.setdefault("providers", _default_providers()) data["providers"].setdefault("anthropic", {"credentials": [{"id": "cli", "label": "Claude CLI (OAuth)", "type": "cli"}]}) data["providers"].setdefault("google", {"accounts": []}) @@ -605,17 +606,20 @@ def remove_google_account(username: str, account_id: str) -> bool: def save_host(username: str, host_id: str | None, label: str, api_url: str, api_key: str, - host_type: str = "openwebui") -> str: + host_type: str = "openwebui", + max_concurrent: int = 3) -> str: """Create or update a host. Returns the host ID.""" data = _load(username) host_type = host_type if host_type in ("openwebui", "openai") else "openwebui" + max_concurrent = max(1, min(int(max_concurrent), 20)) if host_id: for h in data["hosts"]: if h["id"] == host_id: - h["label"] = label.strip() - h["api_url"] = api_url.strip() - h["host_type"] = host_type + h["label"] = label.strip() + h["api_url"] = api_url.strip() + h["host_type"] = host_type + h["max_concurrent"] = max_concurrent if api_key.strip(): h["api_key"] = api_key.strip() _save(username, data) @@ -624,11 +628,12 @@ def save_host(username: str, host_id: str | None, host_id = secrets.token_hex(4) data["hosts"].append({ - "id": host_id, - "label": label.strip(), - "api_url": api_url.strip(), - "api_key": api_key.strip(), - "host_type": host_type, + "id": host_id, + "label": label.strip(), + "api_url": api_url.strip(), + "api_key": api_key.strip(), + "host_type": host_type, + "max_concurrent": max_concurrent, }) _save(username, data) return host_id diff --git a/cortex/routers/local_llm.py b/cortex/routers/local_llm.py index e91a296..16e5652 100644 --- a/cortex/routers/local_llm.py +++ b/cortex/routers/local_llm.py @@ -114,6 +114,11 @@ def _render(username: str, success: str = "", error: str = "") -> str: +
+ + +
@@ -421,19 +426,20 @@ async def remove_google_account(request: Request, account_id: str): @router.post("/settings/local/host", include_in_schema=False) async def save_host( - request: Request, - host_id: str = Form(""), - label: str = Form(""), - api_url: str = Form(""), - api_key: str = Form(""), - host_type: str = Form("openwebui"), + request: Request, + host_id: str = Form(""), + label: str = Form(""), + api_url: str = Form(""), + api_key: str = Form(""), + host_type: str = Form("openwebui"), + max_concurrent: int = Form(3), ): username = _get_user(request) if not username: return RedirectResponse("/login", status_code=302) if not api_url.strip(): return HTMLResponse(_render(username, error="API URL is required.")) - reg.save_host(username, host_id or None, label, api_url, api_key, host_type) + reg.save_host(username, host_id or None, label, api_url, api_key, host_type, max_concurrent) return HTMLResponse(_render(username, success="Host saved.")) diff --git a/cortex/static/HELP.md b/cortex/static/HELP.md index 9a4a6fd..a7dc99e 100644 --- a/cortex/static/HELP.md +++ b/cortex/static/HELP.md @@ -82,12 +82,12 @@ Orchestrated sessions persist to history exactly like regular chat. ### Available Tools -40 tools across 11 categories. Each tool schema is sent to the model on every orchestrated call — fewer active tools means fewer tokens per call. +45 tools across 12 categories. Each tool schema is sent to the model on every orchestrated call — fewer active tools means fewer tokens per call. | Category | Tools | |---|---| | **Web** | `web_search`, `http_fetch` | -| **Files** | `file_read`, `file_list`, `file_write` | +| **Files** | `file_read`, `file_list`, `file_write`, `session_search` | | **Shell** | `shell_exec`, `claude_allow_dir` | | **System** | `cortex_restart`, `cortex_logs`, `cortex_status`, `cortex_update` | | **Tasks** | `task_list`, `task_create`, `task_update`, `task_complete` | @@ -97,8 +97,9 @@ Orchestrated sessions persist to history exactly like regular chat. | **Notifications** | `web_push`, `email_send`, `nc_talk_send` | | **Aether Journals** | `ae_journal_list/search`, `ae_journal_entries_list`, `ae_journal_entry_read/create/update/disable/append/prepend` | | **Agent Notes** | `agent_notes_read`, `agent_notes_write`, `agent_notes_append`, `agent_notes_clear` | +| **Agents** | `spawn_agent` | -File, Shell, System, and some Notification tools are **admin-only** and not visible to regular users. +File, Shell, System, Agents, and some Notification tools are **admin-only** and not visible to regular users. ### Per-Role Tool Sets @@ -277,6 +278,8 @@ Leave all slots empty to use the server default. **Per-role tool sets:** Expand any role card to configure which tool categories the orchestrator can use when that role is active. Unchecked categories are hidden from the model entirely — reducing token overhead on every orchestrated call. Leaving all categories unchecked means all tools the user has access to are available (the default). +**Inject timestamp:** Each role card has an "Inject current date & time into system prompt" checkbox (default on). Disable it for pure processing roles (summarizer, classifier, translator) that don't need clock awareness. + --- ## Nextcloud Talk Bot diff --git a/cortex/static/TOOLS.md b/cortex/static/TOOLS.md index 530dd5b..63bfd86 100644 --- a/cortex/static/TOOLS.md +++ b/cortex/static/TOOLS.md @@ -1,6 +1,6 @@ # Tool Reference -> This reference covers all 44 orchestrator tools available when the ⚡ toggle is on. +> This reference covers all 45 orchestrator tools available when the ⚡ toggle is on. > Tools are invoked automatically by the orchestrator — you don't call them directly. ¹ **Admin only** — requires the `admin` role. Invisible to regular users. @@ -113,3 +113,11 @@ Private, durable notes visible only to the orchestrator — not surfaced to user | `agent_notes_write` | Overwrite the notes file completely | | `agent_notes_append` | Append a timestamped entry (keeps last 3 backups automatically) | | `agent_notes_clear` | Erase all notes (backs up first) | + +## Agents ¹ + +Spawn sub-agents that run their own tool loop using a specific role's model and tools. + +| Tool | What it does | +|---|---| +| `spawn_agent` ¹ | Spawn a sub-agent synchronously — blocks until the task completes or times out. Params: `task`, `role` (default `chat`), `tier` (1–4, default 1), `timeout` seconds, `max_rounds` override. Only works with `local_openai` and `gemini_api` models. | diff --git a/cortex/static/local_llm.html b/cortex/static/local_llm.html index 52aa2a9..36c368c 100644 --- a/cortex/static/local_llm.html +++ b/cortex/static/local_llm.html @@ -403,6 +403,10 @@
+
+ + +
diff --git a/cortex/tools/__init__.py b/cortex/tools/__init__.py index 8022a3d..aa9ac00 100644 --- a/cortex/tools/__init__.py +++ b/cortex/tools/__init__.py @@ -70,6 +70,7 @@ from tools.agent_notes import ( agent_notes_append as _agent_notes_append, agent_notes_clear as _agent_notes_clear, ) +from tools.agents import spawn_agent as _spawn_agent # ── Declaration imports ─────────────────────────────────────────────────────── @@ -84,6 +85,7 @@ import tools.reminders as _mod_reminders import tools.scratch as _mod_scratch import tools.notify as _mod_notify import tools.agent_notes as _mod_agent_notes +import tools.agents as _mod_agents # ── Tool categories — used by the Model Registry UI for grouped checkboxes ─── @@ -106,6 +108,7 @@ TOOL_CATEGORIES: dict[str, list[str]] = { ], "Aether Tasks": ["ae_task_list"], "Agent Notes": ["agent_notes_read", "agent_notes_write", "agent_notes_append", "agent_notes_clear"], + "Agents": ["spawn_agent"], } # ── Callable registry ───────────────────────────────────────────────────────── @@ -156,6 +159,7 @@ _CALLABLES: dict[str, callable] = { "agent_notes_write": _agent_notes_write, "agent_notes_append": _agent_notes_append, "agent_notes_clear": _agent_notes_clear, + "spawn_agent": _spawn_agent, } # ── Role-based access control ───────────────────────────────────────────────── @@ -172,6 +176,7 @@ TOOL_ROLES: dict[str, str] = { "file_list": "admin", "file_write": "admin", "ae_task_list": "admin", + "spawn_agent": "admin", "email_send": "admin", "nc_talk_send": "admin", } @@ -208,6 +213,7 @@ _ALL_DECLARATIONS: list[types.FunctionDeclaration] = ( + _mod_ae_knowledge.DECLARATIONS + _mod_ae_tasks.DECLARATIONS + _mod_agent_notes.DECLARATIONS + + _mod_agents.DECLARATIONS ) # Full Gemini Tool object (all tools — use get_tools_for_role() in production) diff --git a/cortex/tools/agents.py b/cortex/tools/agents.py new file mode 100644 index 0000000..c932c50 --- /dev/null +++ b/cortex/tools/agents.py @@ -0,0 +1,204 @@ +""" +Agent spawning tool — lets the orchestrator launch sub-agents synchronously. + +Sub-agents run using the model assigned to the specified role. The call blocks +until the sub-agent completes or times out. + +Supported model types: local_openai, gemini_api. +claude_cli / gemini_cli are chat-only and do not support sub-agent tool loops. +""" + +import asyncio +import logging + +from google.genai import types + +logger = logging.getLogger(__name__) + +# Per-host semaphores — keyed by "host:" or "type:" +# Created lazily on first use; never deleted (module-level singletons) +_semaphores: dict[str, asyncio.Semaphore] = {} +_sem_lock = asyncio.Lock() + + +async def _get_semaphore(key: str, max_concurrent: int) -> asyncio.Semaphore: + """Return (or create) the semaphore for a given host/type key.""" + async with _sem_lock: + if key not in _semaphores: + _semaphores[key] = asyncio.Semaphore(max_concurrent) + return _semaphores[key] + + +async def spawn_agent( + task: str, + role: str = "chat", + tier: int = 1, + timeout: int = 120, + max_rounds: int | None = None, +) -> str: + """ + Spawn a sub-agent to complete a task synchronously. + + The sub-agent uses the model and tools assigned to the given role. Returns + the sub-agent's response as a string. + """ + import model_registry + from context_loader import load_context + from auth_utils import get_user_role, get_tool_policy + from persona import get_user + + user = get_user() or "scott" + + role_cfg = model_registry.get_role_config(user, role) + model_cfg = model_registry.get_model_for_role(user, role) + + if not model_cfg: + return f"spawn_agent: no model configured for role '{role}'" + + model_type = model_cfg.get("type", "unknown") + + if model_type not in ("local_openai", "gemini_api"): + return ( + f"spawn_agent: model type '{model_type}' does not support tool-enabled sub-agents. " + f"Assign a local_openai or gemini_api model to role '{role}'." + ) + + # Determine concurrency key and semaphore limit + host_id = model_cfg.get("host_id") + if host_id: + registry = model_registry.get_registry(user) + host = next((h for h in registry.get("hosts", []) if h["id"] == host_id), None) + max_concurrent = (host or {}).get("max_concurrent", 3) + sem_key = f"host:{host_id}" + else: + max_concurrent = 5 if model_type == "gemini_api" else 3 + sem_key = f"type:{model_type}" + + sem = await _get_semaphore(sem_key, max_concurrent) + + system_prompt = load_context( + tier=tier, + include_long=(tier >= 2), + include_mid=(tier >= 2), + include_short=(tier >= 2), + role_append=role_cfg.get("system_append", ""), + inject_datetime=role_cfg.get("inject_datetime", True), + ) + + user_role = get_user_role(user) + tool_list = role_cfg.get("tools") + policy = get_tool_policy(user) + confirm_allow = set(policy.get("allow", [])) + confirm_deny = set(policy.get("deny", [])) + + if max_rounds is not None: + model_cfg = dict(model_cfg) + model_cfg["max_rounds"] = max_rounds + + async def _run() -> str: + if model_type == "local_openai": + import openai_orchestrator + result = await openai_orchestrator.run( + task=task, + system_prompt=system_prompt, + model_cfg=model_cfg, + respond_with_final=True, + user_role=user_role, + tool_list=tool_list, + confirm_allow=confirm_allow, + confirm_deny=confirm_deny, + ) + if result.checkpoint: + return ( + "Sub-agent requires user confirmation — " + "confirmation gates are not supported inside spawn_agent. " + "Pre-allow the tool in the user's tool policy or use a different role." + ) + return result.response or "(sub-agent returned no output)" + + # gemini_api + import orchestrator_engine + from auth_utils import get_user_gemini_key + gemini_key = model_cfg.get("api_key") or get_user_gemini_key(user) + result = await orchestrator_engine.run( + task=task, + system_prompt=system_prompt, + session_messages=None, + respond_with_claude=True, + gemini_api_key=gemini_key, + model_name=model_cfg.get("model_name"), + response_role=role, + user_role=user_role, + tool_list=tool_list, + confirm_allow=confirm_allow, + confirm_deny=confirm_deny, + ) + if result.checkpoint: + return ( + "Sub-agent requires user confirmation — " + "confirmation gates are not supported inside spawn_agent." + ) + return result.response or "(sub-agent returned no output)" + + async with sem: + try: + logger.info( + "spawn_agent: role=%s tier=%d timeout=%ds task=%.80s", + role, tier, timeout, task, + ) + response = await asyncio.wait_for(_run(), timeout=float(timeout)) + logger.info("spawn_agent: done role=%s response=%d chars", role, len(response)) + return response + except asyncio.TimeoutError: + logger.warning("spawn_agent: timed out after %ds role=%s", timeout, role) + return f"Sub-agent timed out after {timeout}s (role={role})" + except Exception as e: + logger.exception("spawn_agent: failed role=%s", role) + return f"Sub-agent error ({role}): {e}" + + +DECLARATIONS = [ + types.FunctionDeclaration( + name="spawn_agent", + description=( + "Spawn a sub-agent to complete a task synchronously. " + "The sub-agent uses the model and tool set assigned to the given role. " + "Use for processing pipelines, parallel analysis, or delegating " + "specialized work (research, coding, data migration, etc.)." + ), + parameters=types.Schema( + type=types.Type.OBJECT, + properties={ + "task": types.Schema( + type=types.Type.STRING, + description="The complete task description for the sub-agent.", + ), + "role": types.Schema( + type=types.Type.STRING, + description=( + "Role determining the model and tools. " + "E.g. 'research' for web lookups, 'coder' for code tasks, " + "'distill' for summarization. Defaults to 'chat'." + ), + ), + "tier": types.Schema( + type=types.Type.INTEGER, + description=( + "Context tier: 1 = minimal (fast, identity only), " + "2 = standard (+ memory), 3 = + last 2 session logs. " + "Use 1 for pure processing tasks." + ), + ), + "timeout": types.Schema( + type=types.Type.INTEGER, + description="Max seconds to wait (default 120).", + ), + "max_rounds": types.Schema( + type=types.Type.INTEGER, + description="Override max tool-loop iterations for this call.", + ), + }, + required=["task"], + ), + ) +] diff --git a/documentation/ARCH__FUTURE.md b/documentation/ARCH__FUTURE.md index 919a488..e0fee82 100644 --- a/documentation/ARCH__FUTURE.md +++ b/documentation/ARCH__FUTURE.md @@ -46,7 +46,7 @@ Full API reference: [`docs/OPEN_WEBUI_API.md`](../docs/OPEN_WEBUI_API.md) ## 2. Orchestrator Tool Expansions -**Status:** Ongoing. Current tool count: 44. Previously planned tools are all complete. +**Status:** Ongoing. Current tool count: 45. Previously planned tools are all complete. ### Completed All originally planned tools are live: `cortex_restart`, `cortex_logs`, `http_fetch`, @@ -58,10 +58,17 @@ All originally planned tools are live: `cortex_restart`, `cortex_logs`, `http_fe via `context_loader.py` (`--- System --- Current date and time: ...`). A dedicated `datetime_now` tool is not needed — the timestamp is always in context. +### Completed Round 2 +| Tool | Notes | +|---|---| +| `session_search` | `tools/files.py` — full-text grep across session logs; params: `query`, `limit` (max 20); own sessions only via ContextVars. 2026-05-08 | +| `reminders due dates` | `tools/reminders.py` — optional `due: YYYY-MM-DD` on `reminders_add`; `load_due_reminders()` suppresses future-dated entries from context. 2026-05-08 | +| `spawn_agent` | `tools/agents.py` — sync sub-agent via role model; semaphore per host (`max_concurrent` in host schema); `asyncio.wait_for` timeout; admin-only. 2026-05-08 | + +### Remaining Round 2 + | Tool | Module | Priority | Description | |---|---|---|---| -| `session_search` | new `search.py` or `files.py` | High | Full-text search across past session logs. The UI search already exists (`GET /sessions/search`) — this exposes it to the orchestrator so the agent can answer "what did we discuss about X last month?" | -| `reminders due dates` | `reminders.py` | Medium | Add optional `due` field to `reminders_add`. Surface only due/overdue reminders in context rather than the full flat list. Makes reminders time-aware rather than always-on noise. | | `http_post` | `web.py` | Medium | POST to an external URL — for webhooks, REST APIs, form submissions. Requires a per-user host allowlist (same pattern as `email_send`) to prevent misuse. | | `nc_talk_history` | `notify.py` | Medium | Read recent messages from a Nextcloud Talk conversation. The bot can send but cannot read — adding read capability gives it full context before replying. | | `task_list` priority filter | `tasks.py` | Low | `task_list` accepts `status` but not `priority`. Add `priority` param so the agent can ask "what are my high-priority tasks?" without returning everything. | diff --git a/documentation/TODO__Agents.md b/documentation/TODO__Agents.md index 3735b8c..587a4a5 100644 --- a/documentation/TODO__Agents.md +++ b/documentation/TODO__Agents.md @@ -81,6 +81,12 @@ system prompt by `context_loader.py` at all tiers. - `context_loader.py` calls `load_due_reminders()` — future-dated sections suppressed until due - `reminders_list` shows `[OVERDUE]`, `[due TODAY]`, or `[due: YYYY-MM-DD]` per entry - Backward compatible — existing undated reminders always surface as before +- [x] **`spawn_agent`** — spawn a synchronous sub-agent using any role's model + tools — 2026-05-08 + - `cortex/tools/agents.py` — `spawn_agent(task, role, tier, timeout, max_rounds)` + - Per-host asyncio semaphore keyed by `host_id` (or model type for cloud); `max_concurrent` field in host schema + - Supports `local_openai` and `gemini_api` model types; returns error string for others + - Admin-only tool (powerful — can spawn arbitrarily long sub-tasks) + - Host UI: "Max parallel" number input in host edit/add forms - [ ] **`http_post`** — POST to external URLs - Params: `url: str`, `body: dict | str`, `headers: dict | None` - Per-user host allowlist in `home/{user}/http_allowlist.json` (same pattern as email)