feat: tool schema optimization, keyword routing, aider_run coding agent

Tool schema optimization (PLAN__Tool_Schema_Optimization.md Phases 1-3): - model_registry.py: ROLE_DEFAULT_TOOLS — distill gets [], research/coder get narrow tool lists by default; applied in get_role_config() when user hasn't configured a custom list - openai_orchestrator.py: keyword routing via narrow_tools_by_keywords() — scans user message + last assistant turn; narrows active schemas to matched categories only (e.g. "weather" → 3 web tools instead of 69); zero tools sent for pure chat - openai_orchestrator.py: _get_cached_tools() — module-level schema cache keyed by (role, sorted_tool_list, risk_params); eliminates redundant schema rebuilds - openai_orchestrator.py: _TOOL_SCHEMA_OVERHEAD 3000 → 500 tokens (schemas now excluded from the per-call fixed estimate since they're cached separately) - tools/__init__.py: CATEGORY_TOOL_MAP + _KEYWORD_CATEGORY_MAP + classify_tool_categories() + narrow_tools_by_keywords() — the classifier logic lives here so both orchestrators can share it aider_run tool (cortex/tools/aider.py): - Invokes Aider as a subprocess with --message --yes-always --no-pretty --no-stream - Project aliases: cortex / aether_api / aether_frontend / aether_container - Auto-injects OpenRouter API key from Cortex model registry (no ~/.env needed) - background=True fires async + registers in agent_manager; notify=True sends push notification on completion - admin-only, confirm-required, TOOL_RISK=high - .gitignore: added .aider.chat.history.md / .aider.input.history / .aider.llm.history Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-03 22:39:44 -04:00
parent 29940c299b
commit 29d8aa4aae
6 changed files with 830 additions and 10 deletions
--- a/cortex/openai_orchestrator.py
+++ b/cortex/openai_orchestrator.py
@@ -25,7 +25,7 @@ from openai import AsyncOpenAI, APIConnectionError, APIStatusError

 from config import settings
 from orchestrator_engine import OrchestrateCheckpoint, OrchestratorResult
-from tools import OPENAI_TOOL_SCHEMAS, call_tool, get_openai_tools_for_role, get_tools_for_role, CONFIRM_REQUIRED
+from tools import OPENAI_TOOL_SCHEMAS, call_tool, get_openai_tools_for_role, get_tools_for_role, CONFIRM_REQUIRED, narrow_tools_by_keywords
 import tool_audit

 logger = logging.getLogger(__name__)
@@ -76,8 +76,18 @@ async def run(
    _confirm_deny = frozenset(confirm_deny or ())
    effective_confirm = (CONFIRM_REQUIRED - set(_confirm_allow)) | set(_confirm_deny)

+    # Keyword routing: narrow schemas to only what this message needs.
+    # Also scans the last assistant turn so follow-ups like "yes, do that" inherit tool context.
+    # Returns [] when no keywords match (zero tool overhead — model responds as plain chat).
+    effective_tool_list = narrow_tools_by_keywords(task, tool_list, context_messages=session_messages)
+    logger.info(
+        "Keyword routing: %d tools active (role_tools=%s)",
+        len(effective_tool_list),
+        len(tool_list) if tool_list is not None else "all",
+    )
+
    client, model_name, active_tools = _build_client(
-        model_cfg, user_role, tool_list,
+        model_cfg, user_role, effective_tool_list,
        max_risk=max_risk, risk_whitelist=risk_whitelist, risk_blacklist=risk_blacklist,
    )
    tool_audit.set_context("openai", model_cfg.get("label") or model_name)
@@ -104,7 +114,7 @@ async def run(
        model_cfg=model_cfg,
        respond_with_final=respond_with_final,
        user_role=user_role,
-        tool_list=tool_list,
+        tool_list=effective_tool_list,
        confirm_allow=_confirm_allow,
        confirm_deny=_confirm_deny,
        starting_round=0,
@@ -198,13 +208,39 @@ async def resume(checkpoint: OrchestrateCheckpoint, confirmed: bool) -> Orchestr


 _CHARS_PER_TOKEN = 4
-# Fixed token overhead budget for sending 40 tool schemas per call
-_TOOL_SCHEMA_OVERHEAD = 3_000
+# Fixed token overhead budget per call (tool schemas excluded — cached separately)
+_TOOL_SCHEMA_OVERHEAD = 500
 # Chars to keep per truncated old tool result
 _TRUNC_RESULT_CHARS = 400
 # Always keep the last N tool-result messages uncompacted
 _KEEP_RECENT_TOOL_MSGS = 6  # ~2 rounds of 3 tools each

+# Module-level schema cache: key = (user_role, sorted_tools, risk_params)
+# Bounded in practice — keyword routing produces at most ~30 distinct tool sets.
+_tool_schema_cache: dict[str, list[dict]] = {}
+
+
+def _get_cached_tools(
+    user_role: str,
+    tool_list: list[str] | None,
+    max_risk: str | None = None,
+    whitelist: list[str] | None = None,
+    blacklist: list[str] | None = None,
+) -> list[dict]:
+    key = "|".join([
+        user_role,
+        str(sorted(tool_list) if tool_list is not None else "all"),
+        str(max_risk),
+        str(sorted(whitelist) if whitelist else ""),
+        str(sorted(blacklist) if blacklist else ""),
+    ])
+    if key not in _tool_schema_cache:
+        _tool_schema_cache[key] = get_openai_tools_for_role(
+            user_role, tool_list,
+            max_risk=max_risk, whitelist=whitelist, blacklist=blacklist,
+        )
+    return _tool_schema_cache[key]
+

 def _estimate_tokens(messages: list[dict]) -> int:
    total = sum(len(json.dumps(m)) for m in messages)
@@ -448,7 +484,7 @@ def _build_client(
    if model_cfg.get("tools") is False:
        active_tools = []
    else:
-        active_tools = get_openai_tools_for_role(
+        active_tools = _get_cached_tools(
            user_role, tool_list,
            max_risk=max_risk, whitelist=risk_whitelist, blacklist=risk_blacklist,
        )