From 09d775b47bd99051e69ec41b4ce29555ddd8a755 Mon Sep 17 00:00:00 2001
From: Scott Idem <stidem@gmail.com>
Date: Fri, 8 May 2026 22:48:21 -0400
Subject: [PATCH] feat: spawn_agent tool + host max_concurrent + docs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a synchronous sub-agent spawning tool that lets the orchestrator
delegate tasks to a specific role's model and tool set.

- cortex/tools/agents.py: spawn_agent(task, role, tier, timeout, max_rounds)
  - Supports local_openai and gemini_api model types
  - Per-host asyncio semaphore (keyed by host_id or model type)
  - asyncio.wait_for() enforces timeout; admin-only tool
- cortex/model_registry.py: max_concurrent field in host schema (default 3,
  clamped 1-20); backfilled on _normalize() for existing hosts
- cortex/routers/local_llm.py + local_llm.html: "Max parallel" number input
  in host add/edit forms
- cortex/tools/__init__.py: spawn_agent registered in TOOL_CATEGORIES["Agents"],
  _CALLABLES, TOOL_ROLES (admin), and _ALL_DECLARATIONS
- Docs: TOOLS.md count 44→45, spawn_agent section; HELP.md tool table updated;
  ARCH__FUTURE.md Round 2 completed items; TODO__Agents.md spawn_agent checked;
  CLAUDE.md tool count and list updated

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 CLAUDE.md                     |   6 +-
 cortex/model_registry.py      |  23 ++--
 cortex/routers/local_llm.py   |  20 ++--
 cortex/static/HELP.md         |   9 +-
 cortex/static/TOOLS.md        |  10 +-
 cortex/static/local_llm.html  |   4 +
 cortex/tools/__init__.py      |   6 +
 cortex/tools/agents.py        | 204 ++++++++++++++++++++++++++++++++++
 documentation/ARCH__FUTURE.md |  13 ++-
 documentation/TODO__Agents.md |   6 +
 10 files changed, 275 insertions(+), 26 deletions(-)
 create mode 100644 cortex/tools/agents.py

diff --git a/CLAUDE.md b/CLAUDE.md
index e0878d6..c374b11 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -255,14 +255,14 @@ Cortex is running and stable. All channels are live:
 
 Active users: scott (inara), holly (tina), brian (wintermute)
 
-**40 orchestrator tools:** web_search, http_fetch,
-file_read/list/write, shell_exec, claude_allow_dir,
+**45 orchestrator tools:** web_search, http_fetch,
+file_read/list/write/session_search, shell_exec, claude_allow_dir,
 cortex_restart/logs/status/update,
 task_list/create/update/complete, cron_list/add/remove/toggle,
 reminders_add/list/remove/clear, scratch_read/write/append/clear,
 web_push, email_send, nc_talk_send,
 ae_journal_list/search/entries_list/entry_read/entry_create/entry_update/entry_disable/entry_append/entry_prepend,
-ae_task_list.
+ae_task_list, agent_notes_read/write/append/clear, spawn_agent.
 
 See `documentation/TODO__Agents.md` for the active task list.
 See `documentation/ROADMAP.md` for phases and what's next.
diff --git a/cortex/model_registry.py b/cortex/model_registry.py
index 04698ae..f859796 100644
--- a/cortex/model_registry.py
+++ b/cortex/model_registry.py
@@ -189,6 +189,7 @@ def _normalize(data: dict) -> dict:
     """Back-fill missing fields introduced by schema additions."""
     for h in data.get("hosts", []):
         h.setdefault("host_type", "openwebui")
+        h.setdefault("max_concurrent", 3)
     data.setdefault("providers", _default_providers())
     data["providers"].setdefault("anthropic", {"credentials": [{"id": "cli", "label": "Claude CLI (OAuth)", "type": "cli"}]})
     data["providers"].setdefault("google", {"accounts": []})
@@ -605,17 +606,20 @@ def remove_google_account(username: str, account_id: str) -> bool:
 
 def save_host(username: str, host_id: str | None,
               label: str, api_url: str, api_key: str,
-              host_type: str = "openwebui") -> str:
+              host_type: str = "openwebui",
+              max_concurrent: int = 3) -> str:
     """Create or update a host. Returns the host ID."""
     data = _load(username)
     host_type = host_type if host_type in ("openwebui", "openai") else "openwebui"
+    max_concurrent = max(1, min(int(max_concurrent), 20))
 
     if host_id:
         for h in data["hosts"]:
             if h["id"] == host_id:
-                h["label"]     = label.strip()
-                h["api_url"]   = api_url.strip()
-                h["host_type"] = host_type
+                h["label"]          = label.strip()
+                h["api_url"]        = api_url.strip()
+                h["host_type"]      = host_type
+                h["max_concurrent"] = max_concurrent
                 if api_key.strip():
                     h["api_key"] = api_key.strip()
                 _save(username, data)
@@ -624,11 +628,12 @@ def save_host(username: str, host_id: str | None,
 
     host_id = secrets.token_hex(4)
     data["hosts"].append({
-        "id":        host_id,
-        "label":     label.strip(),
-        "api_url":   api_url.strip(),
-        "api_key":   api_key.strip(),
-        "host_type": host_type,
+        "id":            host_id,
+        "label":         label.strip(),
+        "api_url":       api_url.strip(),
+        "api_key":       api_key.strip(),
+        "host_type":     host_type,
+        "max_concurrent": max_concurrent,
     })
     _save(username, data)
     return host_id
diff --git a/cortex/routers/local_llm.py b/cortex/routers/local_llm.py
index e91a296..16e5652 100644
--- a/cortex/routers/local_llm.py
+++ b/cortex/routers/local_llm.py
@@ -114,6 +114,11 @@ def _render(username: str, success: str = "", error: str = "") -> str:
                   <option value="openai"{ai}>OpenAI-compatible (OpenRouter, etc.)</option>
                 </select>
               </div>
+              <div class="field" style="flex:0 0 auto; width:6rem">
+                <label>Max parallel</label>
+                <input type="number" name="max_concurrent" min="1" max="20"
+                       value="{h.get('max_concurrent', 3)}" style="width:100%">
+              </div>
             </div>
             <div class="btn-row">
               <button type="submit" class="btn btn-secondary btn-sm">Save</button>
@@ -421,19 +426,20 @@ async def remove_google_account(request: Request, account_id: str):
 
 @router.post("/settings/local/host", include_in_schema=False)
 async def save_host(
-    request:   Request,
-    host_id:   str = Form(""),
-    label:     str = Form(""),
-    api_url:   str = Form(""),
-    api_key:   str = Form(""),
-    host_type: str = Form("openwebui"),
+    request:        Request,
+    host_id:        str = Form(""),
+    label:          str = Form(""),
+    api_url:        str = Form(""),
+    api_key:        str = Form(""),
+    host_type:      str = Form("openwebui"),
+    max_concurrent: int = Form(3),
 ):
     username = _get_user(request)
     if not username:
         return RedirectResponse("/login", status_code=302)
     if not api_url.strip():
         return HTMLResponse(_render(username, error="API URL is required."))
-    reg.save_host(username, host_id or None, label, api_url, api_key, host_type)
+    reg.save_host(username, host_id or None, label, api_url, api_key, host_type, max_concurrent)
     return HTMLResponse(_render(username, success="Host saved."))
 
 
diff --git a/cortex/static/HELP.md b/cortex/static/HELP.md
index 9a4a6fd..a7dc99e 100644
--- a/cortex/static/HELP.md
+++ b/cortex/static/HELP.md
@@ -82,12 +82,12 @@ Orchestrated sessions persist to history exactly like regular chat.
 
 ### Available Tools
 
-40 tools across 11 categories. Each tool schema is sent to the model on every orchestrated call — fewer active tools means fewer tokens per call.
+45 tools across 12 categories. Each tool schema is sent to the model on every orchestrated call — fewer active tools means fewer tokens per call.
 
 | Category | Tools |
 |---|---|
 | **Web** | `web_search`, `http_fetch` |
-| **Files** | `file_read`, `file_list`, `file_write` |
+| **Files** | `file_read`, `file_list`, `file_write`, `session_search` |
 | **Shell** | `shell_exec`, `claude_allow_dir` |
 | **System** | `cortex_restart`, `cortex_logs`, `cortex_status`, `cortex_update` |
 | **Tasks** | `task_list`, `task_create`, `task_update`, `task_complete` |
@@ -97,8 +97,9 @@ Orchestrated sessions persist to history exactly like regular chat.
 | **Notifications** | `web_push`, `email_send`, `nc_talk_send` |
 | **Aether Journals** | `ae_journal_list/search`, `ae_journal_entries_list`, `ae_journal_entry_read/create/update/disable/append/prepend` |
 | **Agent Notes** | `agent_notes_read`, `agent_notes_write`, `agent_notes_append`, `agent_notes_clear` |
+| **Agents** | `spawn_agent` |
 
-File, Shell, System, and some Notification tools are **admin-only** and not visible to regular users.
+File, Shell, System, Agents, and some Notification tools are **admin-only** and not visible to regular users.
 
 ### Per-Role Tool Sets
 
@@ -277,6 +278,8 @@ Leave all slots empty to use the server default.
 
 **Per-role tool sets:** Expand any role card to configure which tool categories the orchestrator can use when that role is active. Unchecked categories are hidden from the model entirely — reducing token overhead on every orchestrated call. Leaving all categories unchecked means all tools the user has access to are available (the default).
 
+**Inject timestamp:** Each role card has an "Inject current date & time into system prompt" checkbox (default on). Disable it for pure processing roles (summarizer, classifier, translator) that don't need clock awareness.
+
 ---
 
 ## Nextcloud Talk Bot
diff --git a/cortex/static/TOOLS.md b/cortex/static/TOOLS.md
index 530dd5b..63bfd86 100644
--- a/cortex/static/TOOLS.md
+++ b/cortex/static/TOOLS.md
@@ -1,6 +1,6 @@
 # Tool Reference
 
-> This reference covers all 44 orchestrator tools available when the ⚡ toggle is on.
+> This reference covers all 45 orchestrator tools available when the ⚡ toggle is on.
 > Tools are invoked automatically by the orchestrator — you don't call them directly.
 
 ¹ **Admin only** — requires the `admin` role. Invisible to regular users.  
@@ -113,3 +113,11 @@ Private, durable notes visible only to the orchestrator — not surfaced to user
 | `agent_notes_write` | Overwrite the notes file completely |
 | `agent_notes_append` | Append a timestamped entry (keeps last 3 backups automatically) |
 | `agent_notes_clear` | Erase all notes (backs up first) |
+
+## Agents ¹
+
+Spawn sub-agents that run their own tool loop using a specific role's model and tools.
+
+| Tool | What it does |
+|---|---|
+| `spawn_agent` ¹ | Spawn a sub-agent synchronously — blocks until the task completes or times out. Params: `task`, `role` (default `chat`), `tier` (1–4, default 1), `timeout` seconds, `max_rounds` override. Only works with `local_openai` and `gemini_api` models. |
diff --git a/cortex/static/local_llm.html b/cortex/static/local_llm.html
index 52aa2a9..36c368c 100644
--- a/cortex/static/local_llm.html
+++ b/cortex/static/local_llm.html
@@ -403,6 +403,10 @@
                   <option value="openai">OpenAI-compatible (OpenRouter, etc.)</option>
                 </select>
               </div>
+              <div class="field" style="flex:0 0 auto; width:6rem">
+                <label>Max parallel</label>
+                <input type="number" name="max_concurrent" min="1" max="20" value="3" style="width:100%">
+              </div>
             </div>
             <div class="btn-row">
               <button type="submit" class="btn btn-primary btn-sm">Add Host</button>
diff --git a/cortex/tools/__init__.py b/cortex/tools/__init__.py
index 8022a3d..aa9ac00 100644
--- a/cortex/tools/__init__.py
+++ b/cortex/tools/__init__.py
@@ -70,6 +70,7 @@ from tools.agent_notes import (
     agent_notes_append as _agent_notes_append,
     agent_notes_clear  as _agent_notes_clear,
 )
+from tools.agents import spawn_agent as _spawn_agent
 
 # ── Declaration imports ───────────────────────────────────────────────────────
 
@@ -84,6 +85,7 @@ import tools.reminders    as _mod_reminders
 import tools.scratch      as _mod_scratch
 import tools.notify       as _mod_notify
 import tools.agent_notes  as _mod_agent_notes
+import tools.agents       as _mod_agents
 
 # ── Tool categories — used by the Model Registry UI for grouped checkboxes ───
 
@@ -106,6 +108,7 @@ TOOL_CATEGORIES: dict[str, list[str]] = {
     ],
     "Aether Tasks":     ["ae_task_list"],
     "Agent Notes":      ["agent_notes_read", "agent_notes_write", "agent_notes_append", "agent_notes_clear"],
+    "Agents":           ["spawn_agent"],
 }
 
 # ── Callable registry ─────────────────────────────────────────────────────────
@@ -156,6 +159,7 @@ _CALLABLES: dict[str, callable] = {
     "agent_notes_write":         _agent_notes_write,
     "agent_notes_append":        _agent_notes_append,
     "agent_notes_clear":         _agent_notes_clear,
+    "spawn_agent":               _spawn_agent,
 }
 
 # ── Role-based access control ─────────────────────────────────────────────────
@@ -172,6 +176,7 @@ TOOL_ROLES: dict[str, str] = {
     "file_list":        "admin",
     "file_write":       "admin",
     "ae_task_list":     "admin",
+    "spawn_agent":      "admin",
     "email_send":       "admin",
     "nc_talk_send":     "admin",
 }
@@ -208,6 +213,7 @@ _ALL_DECLARATIONS: list[types.FunctionDeclaration] = (
     + _mod_ae_knowledge.DECLARATIONS
     + _mod_ae_tasks.DECLARATIONS
     + _mod_agent_notes.DECLARATIONS
+    + _mod_agents.DECLARATIONS
 )
 
 # Full Gemini Tool object (all tools — use get_tools_for_role() in production)
diff --git a/cortex/tools/agents.py b/cortex/tools/agents.py
new file mode 100644
index 0000000..c932c50
--- /dev/null
+++ b/cortex/tools/agents.py
@@ -0,0 +1,204 @@
+"""
+Agent spawning tool — lets the orchestrator launch sub-agents synchronously.
+
+Sub-agents run using the model assigned to the specified role. The call blocks
+until the sub-agent completes or times out.
+
+Supported model types: local_openai, gemini_api.
+claude_cli / gemini_cli are chat-only and do not support sub-agent tool loops.
+"""
+
+import asyncio
+import logging
+
+from google.genai import types
+
+logger = logging.getLogger(__name__)
+
+# Per-host semaphores — keyed by "host:<host_id>" or "type:<model_type>"
+# Created lazily on first use; never deleted (module-level singletons)
+_semaphores: dict[str, asyncio.Semaphore] = {}
+_sem_lock = asyncio.Lock()
+
+
+async def _get_semaphore(key: str, max_concurrent: int) -> asyncio.Semaphore:
+    """Return (or create) the semaphore for a given host/type key."""
+    async with _sem_lock:
+        if key not in _semaphores:
+            _semaphores[key] = asyncio.Semaphore(max_concurrent)
+        return _semaphores[key]
+
+
+async def spawn_agent(
+    task: str,
+    role: str = "chat",
+    tier: int = 1,
+    timeout: int = 120,
+    max_rounds: int | None = None,
+) -> str:
+    """
+    Spawn a sub-agent to complete a task synchronously.
+
+    The sub-agent uses the model and tools assigned to the given role. Returns
+    the sub-agent's response as a string.
+    """
+    import model_registry
+    from context_loader import load_context
+    from auth_utils import get_user_role, get_tool_policy
+    from persona import get_user
+
+    user = get_user() or "scott"
+
+    role_cfg = model_registry.get_role_config(user, role)
+    model_cfg = model_registry.get_model_for_role(user, role)
+
+    if not model_cfg:
+        return f"spawn_agent: no model configured for role '{role}'"
+
+    model_type = model_cfg.get("type", "unknown")
+
+    if model_type not in ("local_openai", "gemini_api"):
+        return (
+            f"spawn_agent: model type '{model_type}' does not support tool-enabled sub-agents. "
+            f"Assign a local_openai or gemini_api model to role '{role}'."
+        )
+
+    # Determine concurrency key and semaphore limit
+    host_id = model_cfg.get("host_id")
+    if host_id:
+        registry = model_registry.get_registry(user)
+        host = next((h for h in registry.get("hosts", []) if h["id"] == host_id), None)
+        max_concurrent = (host or {}).get("max_concurrent", 3)
+        sem_key = f"host:{host_id}"
+    else:
+        max_concurrent = 5 if model_type == "gemini_api" else 3
+        sem_key = f"type:{model_type}"
+
+    sem = await _get_semaphore(sem_key, max_concurrent)
+
+    system_prompt = load_context(
+        tier=tier,
+        include_long=(tier >= 2),
+        include_mid=(tier >= 2),
+        include_short=(tier >= 2),
+        role_append=role_cfg.get("system_append", ""),
+        inject_datetime=role_cfg.get("inject_datetime", True),
+    )
+
+    user_role = get_user_role(user)
+    tool_list = role_cfg.get("tools")
+    policy = get_tool_policy(user)
+    confirm_allow = set(policy.get("allow", []))
+    confirm_deny = set(policy.get("deny", []))
+
+    if max_rounds is not None:
+        model_cfg = dict(model_cfg)
+        model_cfg["max_rounds"] = max_rounds
+
+    async def _run() -> str:
+        if model_type == "local_openai":
+            import openai_orchestrator
+            result = await openai_orchestrator.run(
+                task=task,
+                system_prompt=system_prompt,
+                model_cfg=model_cfg,
+                respond_with_final=True,
+                user_role=user_role,
+                tool_list=tool_list,
+                confirm_allow=confirm_allow,
+                confirm_deny=confirm_deny,
+            )
+            if result.checkpoint:
+                return (
+                    "Sub-agent requires user confirmation — "
+                    "confirmation gates are not supported inside spawn_agent. "
+                    "Pre-allow the tool in the user's tool policy or use a different role."
+                )
+            return result.response or "(sub-agent returned no output)"
+
+        # gemini_api
+        import orchestrator_engine
+        from auth_utils import get_user_gemini_key
+        gemini_key = model_cfg.get("api_key") or get_user_gemini_key(user)
+        result = await orchestrator_engine.run(
+            task=task,
+            system_prompt=system_prompt,
+            session_messages=None,
+            respond_with_claude=True,
+            gemini_api_key=gemini_key,
+            model_name=model_cfg.get("model_name"),
+            response_role=role,
+            user_role=user_role,
+            tool_list=tool_list,
+            confirm_allow=confirm_allow,
+            confirm_deny=confirm_deny,
+        )
+        if result.checkpoint:
+            return (
+                "Sub-agent requires user confirmation — "
+                "confirmation gates are not supported inside spawn_agent."
+            )
+        return result.response or "(sub-agent returned no output)"
+
+    async with sem:
+        try:
+            logger.info(
+                "spawn_agent: role=%s tier=%d timeout=%ds task=%.80s",
+                role, tier, timeout, task,
+            )
+            response = await asyncio.wait_for(_run(), timeout=float(timeout))
+            logger.info("spawn_agent: done role=%s response=%d chars", role, len(response))
+            return response
+        except asyncio.TimeoutError:
+            logger.warning("spawn_agent: timed out after %ds role=%s", timeout, role)
+            return f"Sub-agent timed out after {timeout}s (role={role})"
+        except Exception as e:
+            logger.exception("spawn_agent: failed role=%s", role)
+            return f"Sub-agent error ({role}): {e}"
+
+
+DECLARATIONS = [
+    types.FunctionDeclaration(
+        name="spawn_agent",
+        description=(
+            "Spawn a sub-agent to complete a task synchronously. "
+            "The sub-agent uses the model and tool set assigned to the given role. "
+            "Use for processing pipelines, parallel analysis, or delegating "
+            "specialized work (research, coding, data migration, etc.)."
+        ),
+        parameters=types.Schema(
+            type=types.Type.OBJECT,
+            properties={
+                "task": types.Schema(
+                    type=types.Type.STRING,
+                    description="The complete task description for the sub-agent.",
+                ),
+                "role": types.Schema(
+                    type=types.Type.STRING,
+                    description=(
+                        "Role determining the model and tools. "
+                        "E.g. 'research' for web lookups, 'coder' for code tasks, "
+                        "'distill' for summarization. Defaults to 'chat'."
+                    ),
+                ),
+                "tier": types.Schema(
+                    type=types.Type.INTEGER,
+                    description=(
+                        "Context tier: 1 = minimal (fast, identity only), "
+                        "2 = standard (+ memory), 3 = + last 2 session logs. "
+                        "Use 1 for pure processing tasks."
+                    ),
+                ),
+                "timeout": types.Schema(
+                    type=types.Type.INTEGER,
+                    description="Max seconds to wait (default 120).",
+                ),
+                "max_rounds": types.Schema(
+                    type=types.Type.INTEGER,
+                    description="Override max tool-loop iterations for this call.",
+                ),
+            },
+            required=["task"],
+        ),
+    )
+]
diff --git a/documentation/ARCH__FUTURE.md b/documentation/ARCH__FUTURE.md
index 919a488..e0fee82 100644
--- a/documentation/ARCH__FUTURE.md
+++ b/documentation/ARCH__FUTURE.md
@@ -46,7 +46,7 @@ Full API reference: [`docs/OPEN_WEBUI_API.md`](../docs/OPEN_WEBUI_API.md)
 
 ## 2. Orchestrator Tool Expansions
 
-**Status:** Ongoing. Current tool count: 44. Previously planned tools are all complete.
+**Status:** Ongoing. Current tool count: 45. Previously planned tools are all complete.
 
 ### Completed
 All originally planned tools are live: `cortex_restart`, `cortex_logs`, `http_fetch`,
@@ -58,10 +58,17 @@ All originally planned tools are live: `cortex_restart`, `cortex_logs`, `http_fe
 via `context_loader.py` (`--- System --- Current date and time: ...`). A dedicated
 `datetime_now` tool is not needed — the timestamp is always in context.
 
+### Completed Round 2
+| Tool | Notes |
+|---|---|
+| `session_search` | `tools/files.py` — full-text grep across session logs; params: `query`, `limit` (max 20); own sessions only via ContextVars. 2026-05-08 |
+| `reminders due dates` | `tools/reminders.py` — optional `due: YYYY-MM-DD` on `reminders_add`; `load_due_reminders()` suppresses future-dated entries from context. 2026-05-08 |
+| `spawn_agent` | `tools/agents.py` — sync sub-agent via role model; semaphore per host (`max_concurrent` in host schema); `asyncio.wait_for` timeout; admin-only. 2026-05-08 |
+
+### Remaining Round 2
+
 | Tool | Module | Priority | Description |
 |---|---|---|---|
-| `session_search` | new `search.py` or `files.py` | High | Full-text search across past session logs. The UI search already exists (`GET /sessions/search`) — this exposes it to the orchestrator so the agent can answer "what did we discuss about X last month?" |
-| `reminders due dates` | `reminders.py` | Medium | Add optional `due` field to `reminders_add`. Surface only due/overdue reminders in context rather than the full flat list. Makes reminders time-aware rather than always-on noise. |
 | `http_post` | `web.py` | Medium | POST to an external URL — for webhooks, REST APIs, form submissions. Requires a per-user host allowlist (same pattern as `email_send`) to prevent misuse. |
 | `nc_talk_history` | `notify.py` | Medium | Read recent messages from a Nextcloud Talk conversation. The bot can send but cannot read — adding read capability gives it full context before replying. |
 | `task_list` priority filter | `tasks.py` | Low | `task_list` accepts `status` but not `priority`. Add `priority` param so the agent can ask "what are my high-priority tasks?" without returning everything. |
diff --git a/documentation/TODO__Agents.md b/documentation/TODO__Agents.md
index 3735b8c..587a4a5 100644
--- a/documentation/TODO__Agents.md
+++ b/documentation/TODO__Agents.md
@@ -81,6 +81,12 @@ system prompt by `context_loader.py` at all tiers.
   - `context_loader.py` calls `load_due_reminders()` — future-dated sections suppressed until due
   - `reminders_list` shows `[OVERDUE]`, `[due TODAY]`, or `[due: YYYY-MM-DD]` per entry
   - Backward compatible — existing undated reminders always surface as before
+- [x] **`spawn_agent`** — spawn a synchronous sub-agent using any role's model + tools — 2026-05-08
+  - `cortex/tools/agents.py` — `spawn_agent(task, role, tier, timeout, max_rounds)`
+  - Per-host asyncio semaphore keyed by `host_id` (or model type for cloud); `max_concurrent` field in host schema
+  - Supports `local_openai` and `gemini_api` model types; returns error string for others
+  - Admin-only tool (powerful — can spawn arbitrarily long sub-tasks)
+  - Host UI: "Max parallel" number input in host edit/add forms
 - [ ] **`http_post`** — POST to external URLs
   - Params: `url: str`, `body: dict | str`, `headers: dict | None`
   - Per-user host allowlist in `home/{user}/http_allowlist.json` (same pattern as email)