From 1cc7988953256a674258b97be4831336eb63c270 Mon Sep 17 00:00:00 2001 From: Scott Idem Date: Tue, 28 Apr 2026 20:29:46 -0400 Subject: [PATCH] feat: add shell_exec tool and fix orchestrator model name resolution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add shell_exec to orchestrator tool suite (system.py + __init__.py) Runs arbitrary shell commands on the Cortex host with timeout (1–120s), combined stdout/stderr output, optional working_dir, and exit code reporting. Enables system diagnostics (df, ls, ps, journalctl, etc.) from Agent mode. - Fix orchestrator_engine.run() to use model_name from resolved registry entry Previously used settings.orchestrator_model (.env hardcode) regardless of what model was assigned to the orchestrator role. Now accepts model_name param and falls back to settings value only when registry has no model_name. - Update ARCH__FUTURE.md: date, running host, local orchestrator status, model registry V2 progress, added Cortex Mesh concept (section 9) Co-Authored-By: Claude Sonnet 4.6 --- cortex/orchestrator_engine.py | 3 ++- cortex/routers/orchestrator.py | 1 + cortex/tools/__init__.py | 33 ++++++++++++++++++++++++++- cortex/tools/system.py | 41 ++++++++++++++++++++++++++++++++++ documentation/ARCH__FUTURE.md | 29 ++++++++++++++++++++---- 5 files changed, 101 insertions(+), 6 deletions(-) diff --git a/cortex/orchestrator_engine.py b/cortex/orchestrator_engine.py index e1b00f4..67271e7 100644 --- a/cortex/orchestrator_engine.py +++ b/cortex/orchestrator_engine.py @@ -57,6 +57,7 @@ async def run( session_messages: list[dict] | None = None, respond_with_claude: bool = True, gemini_api_key: str | None = None, + model_name: str | None = None, ) -> OrchestratorResult: """ Run the full orchestration loop for a task. @@ -96,7 +97,7 @@ async def run( response = await asyncio.to_thread( client.models.generate_content, - model=settings.orchestrator_model, + model=model_name or settings.orchestrator_model, contents=contents, config=types.GenerateContentConfig( tools=TOOL_DECLARATIONS, diff --git a/cortex/routers/orchestrator.py b/cortex/routers/orchestrator.py index 08961d0..30863c9 100644 --- a/cortex/routers/orchestrator.py +++ b/cortex/routers/orchestrator.py @@ -183,6 +183,7 @@ async def _run_job(job_id: str, req: OrchestrateRequest, user: str) -> None: session_messages=session_messages, respond_with_claude=req.respond_with_claude, gemini_api_key=gemini_key, + model_name=orch_model.get("model_name") if orch_model else None, ) # Save the turn to the session store so it survives a page refresh diff --git a/cortex/tools/__init__.py b/cortex/tools/__init__.py index 6677848..7bd3369 100644 --- a/cortex/tools/__init__.py +++ b/cortex/tools/__init__.py @@ -20,7 +20,7 @@ from tools.ae_knowledge import journal_search as _ae_journal_search from tools.ae_knowledge import journal_entry_create as _ae_journal_entry_create from tools.ae_tasks import task_list as _ae_task_list from tools.files import file_read as _file_read -from tools.system import claude_allow_dir as _claude_allow_dir +from tools.system import claude_allow_dir as _claude_allow_dir, shell_exec as _shell_exec from tools.tasks import task_list as _task_list, task_create as _task_create from tools.tasks import task_update as _task_update, task_complete as _task_complete from tools.cron import ( @@ -192,6 +192,7 @@ _CALLABLES: dict[str, callable] = { "ae_task_list": _ae_task_list, "file_read": _file_read, "claude_allow_dir": _claude_allow_dir, + "shell_exec": _shell_exec, "task_list": _task_list, "task_create": _task_create, "task_update": _task_update, @@ -236,6 +237,35 @@ _claude_allow_dir_declaration = types.FunctionDeclaration( ), ) +_shell_exec_declaration = types.FunctionDeclaration( + name="shell_exec", + description=( + "Execute a shell command on the Cortex host machine and return its output. " + "Use for system diagnostics: disk usage (df -h), process status (ps aux), " + "directory listings (ls), memory (free -h), uptime, network info, log tails, etc. " + "Commands run as the Cortex service user. Timeout enforced (default 30s, max 120s). " + "Avoid destructive commands — prefer read-only system queries." + ), + parameters=types.Schema( + type=types.Type.OBJECT, + properties={ + "command": types.Schema( + type=types.Type.STRING, + description="Shell command to run (e.g. 'df -h', 'ls ~/agents_sync/', 'journalctl --user -u cortex -n 50')", + ), + "working_dir": types.Schema( + type=types.Type.STRING, + description="Optional working directory (e.g. '~/agents_sync/projects'). Defaults to home directory.", + ), + "timeout": types.Schema( + type=types.Type.INTEGER, + description="Timeout in seconds (default 30, max 120)", + ), + }, + required=["command"], + ), +) + _task_list_declaration = types.FunctionDeclaration( name="task_list", description=( @@ -526,6 +556,7 @@ TOOL_DECLARATIONS = [ _ae_task_list_declaration, _file_read_declaration, _claude_allow_dir_declaration, + _shell_exec_declaration, _task_list_declaration, _task_create_declaration, _task_update_declaration, diff --git a/cortex/tools/system.py b/cortex/tools/system.py index 20bb2c2..48dfbf6 100644 --- a/cortex/tools/system.py +++ b/cortex/tools/system.py @@ -6,6 +6,7 @@ These tools affect the host system directly. Use with care. import asyncio import logging +import os logger = logging.getLogger(__name__) @@ -42,3 +43,43 @@ async def claude_allow_dir(path: str, mode: str = "rw") -> str: except Exception as e: logger.error("claude_allow_dir error: %s", e) return f"Error: {e}" + + +async def shell_exec(command: str, working_dir: str | None = None, timeout: int = 30) -> str: + """Execute a shell command on the Cortex host and return combined stdout/stderr.""" + timeout = min(max(timeout, 1), 120) + + cwd = None + if working_dir: + cwd = os.path.expanduser(working_dir) + if not os.path.isdir(cwd): + return f"Error: working_dir '{working_dir}' does not exist or is not a directory" + + try: + proc = await asyncio.create_subprocess_shell( + command, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + cwd=cwd, + ) + stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout) + + out = stdout.decode(errors="replace").strip() + err = stderr.decode(errors="replace").strip() + + parts = [] + if out: + parts.append(out) + if err: + parts.append(f"[stderr]\n{err}") + combined = "\n".join(parts) if parts else "(no output)" + + if proc.returncode != 0: + return f"Exit {proc.returncode}:\n{combined}" + return combined + + except asyncio.TimeoutError: + return f"Error: command timed out after {timeout}s" + except Exception as e: + logger.error("shell_exec error: %s", e) + return f"Error: {e}" diff --git a/documentation/ARCH__FUTURE.md b/documentation/ARCH__FUTURE.md index cbac652..d97f642 100644 --- a/documentation/ARCH__FUTURE.md +++ b/documentation/ARCH__FUTURE.md @@ -1,7 +1,7 @@ # Architecture: Planned Features > What's next and how it's designed to work. -> Last updated: 2026-04-04 +> Last updated: 2026-04-28 For the current task list see `TODO__Agents.md`. For phases and priorities see `ROADMAP.md`. @@ -9,7 +9,7 @@ For the current task list see `TODO__Agents.md`. For phases and priorities see ` ## 1. Local Orchestrator -**Status:** High priority — design complete, not yet built. +**Status:** Partially built — `openai_orchestrator.py` exists and is wired into `POST /orchestrate`. If the `orchestrator` role in the model registry resolves to a `local_openai` model, it routes there automatically. Full parity with the Gemini orchestrator (tool loop quality, error handling, context budget enforcement) is still in progress. Same ReAct tool loop as the Gemini API orchestrator, but driven by a local model via Open WebUI's OpenAI-compatible API. Enables offline/private agent tasks with no API cost. @@ -124,7 +124,7 @@ AE Journals becomes the searchable long-term knowledge base. Complements memory ## 5. Intelligent Model Routing -**Status:** Deferred. Currently user-toggled. +**Status:** Partially addressed. Model Registry V2 (2026-04-27) introduced role-based routing — `chat`, `orchestrator`, `distill`, `coder`, `research` roles each have their own primary/backup model chain, and the UI role toggle lets users manually select which role handles a message. Automatic task-characteristic routing (below) is still deferred. Route automatically based on task characteristics rather than requiring manual backend selection: @@ -183,10 +183,31 @@ The Claude Code system prompt was leaked in early April 2026. Two reimplementati **Status:** Deferred. -Currently running on `scott_lpt` (main laptop). Long-term target: home server (always-on, Docker). +Currently running on `scott-lt-i7-rtx` (gaming/agents laptop). Disabled on `scott_lpt` (2026-04-28) — that machine is a dev/editing node only. Long-term target: home server (always-on, Docker). `docker-compose.yml` already exists in the project root. Deployment path: 1. Copy to home server 2. Configure reverse proxy (Nginx, already Docker-hosted) 3. Set subdomain `cortex.dgrzone.com` → home server internal IP 4. WireGuard required for all access — not internet-exposed + +--- + +## 9. Cortex Mesh (Multi-Instance Fleet) + +**Status:** Concept — no design yet. + +Rather than a single Cortex instance, each device in the fleet runs its own instance with its own persona(s), local models, and capabilities. Instances can delegate tasks to each other based on available resources and roles. + +**Use cases:** +- `scott_lpt` (edit/dev node) delegates code tasks to `scott-lt-i7-rtx` (GPU/Ollama host) +- A background cron on one instance triggers an orchestrated task on another +- Each instance has its own "best available" model — mesh routing picks the right node automatically + +**Design questions to resolve:** +- Auth between instances (shared JWT secret vs. per-instance API keys) +- How instances advertise capabilities (model registry over HTTP? shared Syncthing file?) +- Whether `ae_send_message` / the existing inbox system is the right coordination layer or if a dedicated Cortex-to-Cortex protocol is needed +- Session continuity — does a conversation that starts on one node stay there, or can it migrate? + +The Syncthing-synced `home/` directory and shared `model_registry.json` already provide a natural foundation — instances share persona memory and context without a central DB.