feat: tool schema optimization, keyword routing, aider_run coding agent
Tool schema optimization (PLAN__Tool_Schema_Optimization.md Phases 1-3): - model_registry.py: ROLE_DEFAULT_TOOLS — distill gets [], research/coder get narrow tool lists by default; applied in get_role_config() when user hasn't configured a custom list - openai_orchestrator.py: keyword routing via narrow_tools_by_keywords() — scans user message + last assistant turn; narrows active schemas to matched categories only (e.g. "weather" → 3 web tools instead of 69); zero tools sent for pure chat - openai_orchestrator.py: _get_cached_tools() — module-level schema cache keyed by (role, sorted_tool_list, risk_params); eliminates redundant schema rebuilds - openai_orchestrator.py: _TOOL_SCHEMA_OVERHEAD 3000 → 500 tokens (schemas now excluded from the per-call fixed estimate since they're cached separately) - tools/__init__.py: CATEGORY_TOOL_MAP + _KEYWORD_CATEGORY_MAP + classify_tool_categories() + narrow_tools_by_keywords() — the classifier logic lives here so both orchestrators can share it aider_run tool (cortex/tools/aider.py): - Invokes Aider as a subprocess with --message --yes-always --no-pretty --no-stream - Project aliases: cortex / aether_api / aether_frontend / aether_container - Auto-injects OpenRouter API key from Cortex model registry (no ~/.env needed) - background=True fires async + registers in agent_manager; notify=True sends push notification on completion - admin-only, confirm-required, TOOL_RISK=high - .gitignore: added .aider.chat.history.md / .aider.input.history / .aider.llm.history Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
6
.gitignore
vendored
6
.gitignore
vendored
@@ -25,5 +25,11 @@ tmp/
|
|||||||
*.tmp
|
*.tmp
|
||||||
*.log
|
*.log
|
||||||
|
|
||||||
|
# Aider — history files are personal/ephemeral; .aider.conf.yml is project config and IS tracked
|
||||||
|
.aider.chat.history.md
|
||||||
|
.aider.input.history
|
||||||
|
.aider.llm.history
|
||||||
|
|
||||||
# System files
|
# System files
|
||||||
.DS_Store
|
.DS_Store
|
||||||
|
.aider*
|
||||||
|
|||||||
@@ -81,6 +81,24 @@ from config import settings
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Role-level tool defaults ───────────────────────────────────────────────────
|
||||||
|
# Applied when a user hasn't configured a custom tool list for a role.
|
||||||
|
# None = no restriction (all accessible tools); [] = no tools (pure text processing).
|
||||||
|
# "chat" is intentionally absent: the /chat endpoint never sends tool schemas anyway,
|
||||||
|
# and the orchestrator uses chat_role="chat" as its default — restricting it here
|
||||||
|
# would block all tools from every default orchestration request.
|
||||||
|
# "orchestrator" is intentionally absent — Phase 2 keyword routing narrows it per message.
|
||||||
|
ROLE_DEFAULT_TOOLS: dict[str, list[str] | None] = {
|
||||||
|
"distill": [], # pure text processing — no tools needed
|
||||||
|
"research": ["web_search", "web_read", "http_fetch"],
|
||||||
|
"coder": [
|
||||||
|
"project_file_read", "project_file_list", "file_stat", "file_grep",
|
||||||
|
"file_diff", "file_syntax_check", "file_read", "file_list", "file_write",
|
||||||
|
"git_status", "git_log", "git_diff", "shell_exec",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# ── Provider model catalogs ───────────────────────────────────────────────────
|
# ── Provider model catalogs ───────────────────────────────────────────────────
|
||||||
# Server-side defaults. Update here when providers release new models.
|
# Server-side defaults. Update here when providers release new models.
|
||||||
# Users can add entries via the settings UI (Phase 2).
|
# Users can add entries via the settings UI (Phase 2).
|
||||||
@@ -482,9 +500,16 @@ def get_role_config(username: str, role: str) -> dict:
|
|||||||
"""
|
"""
|
||||||
registry = _load(username)
|
registry = _load(username)
|
||||||
role_cfg = registry.get("roles", {}).get(role, {})
|
role_cfg = registry.get("roles", {}).get(role, {})
|
||||||
|
user_tools = role_cfg.get("tools")
|
||||||
|
if user_tools is None:
|
||||||
|
# No user-configured list — fall back to system defaults for this role
|
||||||
|
effective_tools: list[str] | None = ROLE_DEFAULT_TOOLS.get(role)
|
||||||
|
else:
|
||||||
|
# User has configured tools; preserve their setting (empty list → no restriction)
|
||||||
|
effective_tools = user_tools or None
|
||||||
return {
|
return {
|
||||||
"system_append": role_cfg.get("system_append", ""),
|
"system_append": role_cfg.get("system_append", ""),
|
||||||
"tools": role_cfg.get("tools") or None,
|
"tools": effective_tools,
|
||||||
"inject_datetime": role_cfg.get("inject_datetime", True),
|
"inject_datetime": role_cfg.get("inject_datetime", True),
|
||||||
"inject_mode": role_cfg.get("inject_mode", True),
|
"inject_mode": role_cfg.get("inject_mode", True),
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ from openai import AsyncOpenAI, APIConnectionError, APIStatusError
|
|||||||
|
|
||||||
from config import settings
|
from config import settings
|
||||||
from orchestrator_engine import OrchestrateCheckpoint, OrchestratorResult
|
from orchestrator_engine import OrchestrateCheckpoint, OrchestratorResult
|
||||||
from tools import OPENAI_TOOL_SCHEMAS, call_tool, get_openai_tools_for_role, get_tools_for_role, CONFIRM_REQUIRED
|
from tools import OPENAI_TOOL_SCHEMAS, call_tool, get_openai_tools_for_role, get_tools_for_role, CONFIRM_REQUIRED, narrow_tools_by_keywords
|
||||||
import tool_audit
|
import tool_audit
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -76,8 +76,18 @@ async def run(
|
|||||||
_confirm_deny = frozenset(confirm_deny or ())
|
_confirm_deny = frozenset(confirm_deny or ())
|
||||||
effective_confirm = (CONFIRM_REQUIRED - set(_confirm_allow)) | set(_confirm_deny)
|
effective_confirm = (CONFIRM_REQUIRED - set(_confirm_allow)) | set(_confirm_deny)
|
||||||
|
|
||||||
|
# Keyword routing: narrow schemas to only what this message needs.
|
||||||
|
# Also scans the last assistant turn so follow-ups like "yes, do that" inherit tool context.
|
||||||
|
# Returns [] when no keywords match (zero tool overhead — model responds as plain chat).
|
||||||
|
effective_tool_list = narrow_tools_by_keywords(task, tool_list, context_messages=session_messages)
|
||||||
|
logger.info(
|
||||||
|
"Keyword routing: %d tools active (role_tools=%s)",
|
||||||
|
len(effective_tool_list),
|
||||||
|
len(tool_list) if tool_list is not None else "all",
|
||||||
|
)
|
||||||
|
|
||||||
client, model_name, active_tools = _build_client(
|
client, model_name, active_tools = _build_client(
|
||||||
model_cfg, user_role, tool_list,
|
model_cfg, user_role, effective_tool_list,
|
||||||
max_risk=max_risk, risk_whitelist=risk_whitelist, risk_blacklist=risk_blacklist,
|
max_risk=max_risk, risk_whitelist=risk_whitelist, risk_blacklist=risk_blacklist,
|
||||||
)
|
)
|
||||||
tool_audit.set_context("openai", model_cfg.get("label") or model_name)
|
tool_audit.set_context("openai", model_cfg.get("label") or model_name)
|
||||||
@@ -104,7 +114,7 @@ async def run(
|
|||||||
model_cfg=model_cfg,
|
model_cfg=model_cfg,
|
||||||
respond_with_final=respond_with_final,
|
respond_with_final=respond_with_final,
|
||||||
user_role=user_role,
|
user_role=user_role,
|
||||||
tool_list=tool_list,
|
tool_list=effective_tool_list,
|
||||||
confirm_allow=_confirm_allow,
|
confirm_allow=_confirm_allow,
|
||||||
confirm_deny=_confirm_deny,
|
confirm_deny=_confirm_deny,
|
||||||
starting_round=0,
|
starting_round=0,
|
||||||
@@ -198,13 +208,39 @@ async def resume(checkpoint: OrchestrateCheckpoint, confirmed: bool) -> Orchestr
|
|||||||
|
|
||||||
|
|
||||||
_CHARS_PER_TOKEN = 4
|
_CHARS_PER_TOKEN = 4
|
||||||
# Fixed token overhead budget for sending 40 tool schemas per call
|
# Fixed token overhead budget per call (tool schemas excluded — cached separately)
|
||||||
_TOOL_SCHEMA_OVERHEAD = 3_000
|
_TOOL_SCHEMA_OVERHEAD = 500
|
||||||
# Chars to keep per truncated old tool result
|
# Chars to keep per truncated old tool result
|
||||||
_TRUNC_RESULT_CHARS = 400
|
_TRUNC_RESULT_CHARS = 400
|
||||||
# Always keep the last N tool-result messages uncompacted
|
# Always keep the last N tool-result messages uncompacted
|
||||||
_KEEP_RECENT_TOOL_MSGS = 6 # ~2 rounds of 3 tools each
|
_KEEP_RECENT_TOOL_MSGS = 6 # ~2 rounds of 3 tools each
|
||||||
|
|
||||||
|
# Module-level schema cache: key = (user_role, sorted_tools, risk_params)
|
||||||
|
# Bounded in practice — keyword routing produces at most ~30 distinct tool sets.
|
||||||
|
_tool_schema_cache: dict[str, list[dict]] = {}
|
||||||
|
|
||||||
|
|
||||||
|
def _get_cached_tools(
|
||||||
|
user_role: str,
|
||||||
|
tool_list: list[str] | None,
|
||||||
|
max_risk: str | None = None,
|
||||||
|
whitelist: list[str] | None = None,
|
||||||
|
blacklist: list[str] | None = None,
|
||||||
|
) -> list[dict]:
|
||||||
|
key = "|".join([
|
||||||
|
user_role,
|
||||||
|
str(sorted(tool_list) if tool_list is not None else "all"),
|
||||||
|
str(max_risk),
|
||||||
|
str(sorted(whitelist) if whitelist else ""),
|
||||||
|
str(sorted(blacklist) if blacklist else ""),
|
||||||
|
])
|
||||||
|
if key not in _tool_schema_cache:
|
||||||
|
_tool_schema_cache[key] = get_openai_tools_for_role(
|
||||||
|
user_role, tool_list,
|
||||||
|
max_risk=max_risk, whitelist=whitelist, blacklist=blacklist,
|
||||||
|
)
|
||||||
|
return _tool_schema_cache[key]
|
||||||
|
|
||||||
|
|
||||||
def _estimate_tokens(messages: list[dict]) -> int:
|
def _estimate_tokens(messages: list[dict]) -> int:
|
||||||
total = sum(len(json.dumps(m)) for m in messages)
|
total = sum(len(json.dumps(m)) for m in messages)
|
||||||
@@ -448,7 +484,7 @@ def _build_client(
|
|||||||
if model_cfg.get("tools") is False:
|
if model_cfg.get("tools") is False:
|
||||||
active_tools = []
|
active_tools = []
|
||||||
else:
|
else:
|
||||||
active_tools = get_openai_tools_for_role(
|
active_tools = _get_cached_tools(
|
||||||
user_role, tool_list,
|
user_role, tool_list,
|
||||||
max_risk=max_risk, whitelist=risk_whitelist, blacklist=risk_blacklist,
|
max_risk=max_risk, whitelist=risk_whitelist, blacklist=risk_blacklist,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -87,7 +87,13 @@ from tools.git import (
|
|||||||
git_log as _git_log,
|
git_log as _git_log,
|
||||||
git_diff as _git_diff,
|
git_diff as _git_diff,
|
||||||
)
|
)
|
||||||
from tools.agents import spawn_agent as _spawn_agent
|
from tools.agents import (
|
||||||
|
spawn_agent as _spawn_agent,
|
||||||
|
agent_status as _agent_status,
|
||||||
|
agent_list as _agent_list,
|
||||||
|
agent_cancel as _agent_cancel,
|
||||||
|
)
|
||||||
|
from tools.aider import aider_run as _aider_run
|
||||||
from tools.homeassistant import (
|
from tools.homeassistant import (
|
||||||
ha_get_state as _ha_get_state,
|
ha_get_state as _ha_get_state,
|
||||||
ha_get_states as _ha_get_states,
|
ha_get_states as _ha_get_states,
|
||||||
@@ -114,6 +120,7 @@ import tools.notify as _mod_notify
|
|||||||
import tools.agent_notes as _mod_agent_notes
|
import tools.agent_notes as _mod_agent_notes
|
||||||
import tools.git as _mod_git
|
import tools.git as _mod_git
|
||||||
import tools.agents as _mod_agents
|
import tools.agents as _mod_agents
|
||||||
|
import tools.aider as _mod_aider
|
||||||
import tools.homeassistant as _mod_homeassistant
|
import tools.homeassistant as _mod_homeassistant
|
||||||
import tools.ae_database as _mod_ae_database
|
import tools.ae_database as _mod_ae_database
|
||||||
|
|
||||||
@@ -140,7 +147,7 @@ TOOL_CATEGORIES: dict[str, list[str]] = {
|
|||||||
],
|
],
|
||||||
"Aether Tasks": ["ae_task_list"],
|
"Aether Tasks": ["ae_task_list"],
|
||||||
"Agent Notes": ["agent_notes_read", "agent_notes_write", "agent_notes_append", "agent_notes_clear"],
|
"Agent Notes": ["agent_notes_read", "agent_notes_write", "agent_notes_append", "agent_notes_clear"],
|
||||||
"Agents": ["spawn_agent"],
|
"Agents": ["spawn_agent", "agent_status", "agent_list", "agent_cancel", "aider_run"],
|
||||||
"Home Assistant": ["ha_get_state", "ha_get_states", "ha_call_service"],
|
"Home Assistant": ["ha_get_state", "ha_get_states", "ha_call_service"],
|
||||||
"Aether Database": ["ae_db_query", "ae_db_describe", "ae_db_show_view"],
|
"Aether Database": ["ae_db_query", "ae_db_describe", "ae_db_show_view"],
|
||||||
}
|
}
|
||||||
@@ -207,6 +214,10 @@ _CALLABLES: dict[str, callable] = {
|
|||||||
"git_log": _git_log,
|
"git_log": _git_log,
|
||||||
"git_diff": _git_diff,
|
"git_diff": _git_diff,
|
||||||
"spawn_agent": _spawn_agent,
|
"spawn_agent": _spawn_agent,
|
||||||
|
"agent_status": _agent_status,
|
||||||
|
"agent_list": _agent_list,
|
||||||
|
"agent_cancel": _agent_cancel,
|
||||||
|
"aider_run": _aider_run,
|
||||||
"ha_get_state": _ha_get_state,
|
"ha_get_state": _ha_get_state,
|
||||||
"ha_get_states": _ha_get_states,
|
"ha_get_states": _ha_get_states,
|
||||||
"ha_call_service": _ha_call_service,
|
"ha_call_service": _ha_call_service,
|
||||||
@@ -230,6 +241,10 @@ TOOL_ROLES: dict[str, str] = {
|
|||||||
"file_write": "admin",
|
"file_write": "admin",
|
||||||
"ae_task_list": "admin",
|
"ae_task_list": "admin",
|
||||||
"spawn_agent": "admin",
|
"spawn_agent": "admin",
|
||||||
|
"agent_status": "user",
|
||||||
|
"agent_list": "user",
|
||||||
|
"agent_cancel": "admin",
|
||||||
|
"aider_run": "admin",
|
||||||
"email_send": "admin",
|
"email_send": "admin",
|
||||||
"nc_talk_send": "admin",
|
"nc_talk_send": "admin",
|
||||||
"http_post": "admin",
|
"http_post": "admin",
|
||||||
@@ -251,6 +266,8 @@ CONFIRM_REQUIRED: set[str] = {
|
|||||||
"http_post",
|
"http_post",
|
||||||
"ha_call_service",
|
"ha_call_service",
|
||||||
"ae_journal_entry_disable", # disables a journal entry — not easily reversed
|
"ae_journal_entry_disable", # disables a journal entry — not easily reversed
|
||||||
|
"agent_cancel", # kills a running background task
|
||||||
|
"aider_run", # edits files and commits — irreversible without git revert
|
||||||
}
|
}
|
||||||
|
|
||||||
# Security risk ratings — informational for now; will drive auto-allow tiers later.
|
# Security risk ratings — informational for now; will drive auto-allow tiers later.
|
||||||
@@ -348,8 +365,12 @@ TOOL_RISK: dict[str, str] = {
|
|||||||
"git_log": "low",
|
"git_log": "low",
|
||||||
"git_diff": "low",
|
"git_diff": "low",
|
||||||
|
|
||||||
# Agents — spawning a subprocess with broad permissions is high
|
# Agents — spawning is high; lifecycle reads are low; cancel is medium (kills a task)
|
||||||
"spawn_agent": "high",
|
"spawn_agent": "high",
|
||||||
|
"agent_status": "low",
|
||||||
|
"agent_list": "low",
|
||||||
|
"agent_cancel": "medium",
|
||||||
|
"aider_run": "high",
|
||||||
|
|
||||||
# Home Assistant — reads are low; controlling physical devices is high
|
# Home Assistant — reads are low; controlling physical devices is high
|
||||||
"ha_get_state": "low",
|
"ha_get_state": "low",
|
||||||
@@ -388,6 +409,7 @@ _ALL_DECLARATIONS: list[types.FunctionDeclaration] = (
|
|||||||
+ _mod_ae_tasks.DECLARATIONS
|
+ _mod_ae_tasks.DECLARATIONS
|
||||||
+ _mod_agent_notes.DECLARATIONS
|
+ _mod_agent_notes.DECLARATIONS
|
||||||
+ _mod_agents.DECLARATIONS
|
+ _mod_agents.DECLARATIONS
|
||||||
|
+ _mod_aider.DECLARATIONS
|
||||||
+ _mod_homeassistant.DECLARATIONS
|
+ _mod_homeassistant.DECLARATIONS
|
||||||
+ _mod_ae_database.DECLARATIONS
|
+ _mod_ae_database.DECLARATIONS
|
||||||
)
|
)
|
||||||
@@ -554,3 +576,114 @@ def get_openai_tools_for_role(
|
|||||||
if tool_list is not None:
|
if tool_list is not None:
|
||||||
allowed &= set(tool_list)
|
allowed &= set(tool_list)
|
||||||
return [t for t in OPENAI_TOOL_SCHEMAS if t["function"]["name"] in allowed]
|
return [t for t in OPENAI_TOOL_SCHEMAS if t["function"]["name"] in allowed]
|
||||||
|
|
||||||
|
|
||||||
|
# ── Keyword-based tool routing ─────────────────────────────────────────────────
|
||||||
|
|
||||||
|
# Maps classifier category names → tool names in that category
|
||||||
|
CATEGORY_TOOL_MAP: dict[str, list[str]] = {
|
||||||
|
"web": ["web_search", "web_read", "http_fetch"],
|
||||||
|
"web_post": ["http_post"],
|
||||||
|
"file": ["project_file_read", "project_file_list", "file_stat", "file_grep",
|
||||||
|
"file_diff", "file_syntax_check", "file_read", "file_list", "file_write"],
|
||||||
|
"git": ["git_status", "git_log", "git_diff"],
|
||||||
|
"system": ["cortex_restart", "cortex_logs", "cortex_status", "cortex_update", "shell_exec"],
|
||||||
|
"tasks": ["task_list", "task_create", "task_update", "task_complete"],
|
||||||
|
"cron": ["cron_list", "cron_add", "cron_remove", "cron_toggle"],
|
||||||
|
"reminders": ["reminders_add", "reminders_list", "reminders_remove", "reminders_clear"],
|
||||||
|
"scratchpad": ["scratch_read", "scratch_write", "scratch_append", "scratch_clear"],
|
||||||
|
"ha": ["ha_get_state", "ha_get_states", "ha_call_service"],
|
||||||
|
"aether": ["ae_journal_list", "ae_journal_search", "ae_journal_entries_list",
|
||||||
|
"ae_journal_entry_read", "ae_journal_entry_create", "ae_journal_entry_update",
|
||||||
|
"ae_journal_entry_disable", "ae_journal_entry_append", "ae_journal_entry_prepend"],
|
||||||
|
"aether_db": ["ae_db_query", "ae_db_describe", "ae_db_show_view"],
|
||||||
|
"notifications":["web_push", "email_send", "nc_talk_send", "nc_talk_history"],
|
||||||
|
"agents": ["spawn_agent", "agent_status", "agent_list", "agent_cancel", "aider_run"],
|
||||||
|
"notes": ["agent_notes_read", "agent_notes_write", "agent_notes_append", "agent_notes_clear"],
|
||||||
|
"session": ["session_read", "session_search"],
|
||||||
|
"ae_tasks": ["ae_task_list"],
|
||||||
|
"claude": ["claude_allow_dir"],
|
||||||
|
}
|
||||||
|
|
||||||
|
_KEYWORD_CATEGORY_MAP: dict[str, list[str]] = {
|
||||||
|
"web": ["search", "look up", "what is", "who is", "weather", "forecast",
|
||||||
|
"news", "find on", "google", "website", "article", "research",
|
||||||
|
"temperature"],
|
||||||
|
"web_post": ["post to", "send to", "webhook", "trigger webhook"],
|
||||||
|
"file": ["read file", "show file", "list file", "directory", "grep",
|
||||||
|
"search in", "find in", "diff", "compare", "syntax check", "open file"],
|
||||||
|
"git": ["git", "commit", "branch", "pulled", "merged", "repository", "repo"],
|
||||||
|
"system": ["restart", "update", "status", "logs", "log", "deploy", "run command",
|
||||||
|
"shell", "is it running", "health"],
|
||||||
|
"tasks": ["task", "todo", "to-do", "to do", "add task", "create task",
|
||||||
|
"pending", "what's on my list"],
|
||||||
|
"cron": ["schedule", "cron", "every day", "every week", "recurring",
|
||||||
|
"automate", "job"],
|
||||||
|
"reminders": ["remind", "reminder", "don't forget"],
|
||||||
|
"scratchpad": ["scratch", "scratchpad", "working note", "jot down", "notepad"],
|
||||||
|
"ha": ["home assistant", "light", "thermostat", "turn on", "turn off",
|
||||||
|
"switch", "sensor", "temperature in", "kitchen", "bedroom", "garage"],
|
||||||
|
"aether": ["journal", "aether journal", "note entry", "log entry",
|
||||||
|
"search journal", "ae_journal"],
|
||||||
|
"aether_db": ["database", "query", "sql", "select", "db", "table",
|
||||||
|
"schema", "maria", "run query"],
|
||||||
|
"notifications":["notify", "push notification", "send email", "email",
|
||||||
|
"talk message", "nextcloud"],
|
||||||
|
"agents": ["spawn", "sub-agent", "delegate", "spawn agent",
|
||||||
|
"agent status", "agent list", "cancel agent", "background agent",
|
||||||
|
"aider", "code change", "edit code", "make a change to", "fix the code"],
|
||||||
|
"notes": ["agent notes", "private notes", "my notes", "agent_notes"],
|
||||||
|
"session": ["session", "history", "last time", "what did we", "earlier",
|
||||||
|
"yesterday", "last week", "previously"],
|
||||||
|
"ae_tasks": ["ae task", "kanban", "board", "ae_task"],
|
||||||
|
"claude": ["claude allow", "claude directory"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def classify_tool_categories(message: str) -> list[str]:
|
||||||
|
"""Return category names whose keywords appear in message (case-insensitive).
|
||||||
|
|
||||||
|
Empty return means no tool category matched — route as pure chat with zero tool overhead.
|
||||||
|
"""
|
||||||
|
low = message.lower()
|
||||||
|
return [cat for cat, kws in _KEYWORD_CATEGORY_MAP.items() if any(kw in low for kw in kws)]
|
||||||
|
|
||||||
|
|
||||||
|
def narrow_tools_by_keywords(
|
||||||
|
message: str,
|
||||||
|
role_tools: list[str] | None,
|
||||||
|
context_messages: list[dict] | None = None,
|
||||||
|
) -> list[str]:
|
||||||
|
"""Narrow the active tool list to categories relevant to this message.
|
||||||
|
|
||||||
|
Also scans the last assistant message in context_messages — this catches follow-up
|
||||||
|
patterns like "yes, please do that" where the tool intent was expressed by the assistant
|
||||||
|
in the prior turn and the user is simply confirming.
|
||||||
|
|
||||||
|
Returns [] if no keywords matched (zero tool overhead).
|
||||||
|
Returns keyword-matched tools, intersected with role_tools if role_tools is set.
|
||||||
|
"""
|
||||||
|
scan_text = message
|
||||||
|
if context_messages:
|
||||||
|
for m in reversed(context_messages):
|
||||||
|
if m.get("role") == "assistant":
|
||||||
|
scan_text = scan_text + " " + (m.get("content") or "")
|
||||||
|
break
|
||||||
|
|
||||||
|
matched = classify_tool_categories(scan_text)
|
||||||
|
if not matched:
|
||||||
|
return []
|
||||||
|
|
||||||
|
seen: set[str] = set()
|
||||||
|
dynamic: list[str] = []
|
||||||
|
for cat in matched:
|
||||||
|
for t in CATEGORY_TOOL_MAP.get(cat, []):
|
||||||
|
if t not in seen:
|
||||||
|
seen.add(t)
|
||||||
|
dynamic.append(t)
|
||||||
|
|
||||||
|
if role_tools is not None:
|
||||||
|
role_set = set(role_tools)
|
||||||
|
dynamic = [t for t in dynamic if t in role_set]
|
||||||
|
|
||||||
|
return dynamic
|
||||||
|
|||||||
258
cortex/tools/aider.py
Normal file
258
cortex/tools/aider.py
Normal file
@@ -0,0 +1,258 @@
|
|||||||
|
"""
|
||||||
|
Aider coding agent tool — invokes Aider AI pair programming as a subprocess.
|
||||||
|
|
||||||
|
Aider handles repo-map generation, file editing, git commits, and linting automatically.
|
||||||
|
It works with any OpenAI-compatible model — point it at DeepSeek, Ollama, OpenRouter, etc.
|
||||||
|
via AIDER_MODEL / AIDER_OPENAI_API_BASE env vars or the project's .aider.conf.yml.
|
||||||
|
|
||||||
|
background=True runs the subprocess asynchronously and returns an agent_id immediately.
|
||||||
|
The caller can poll via agent_status() or request a push notification via notify=True.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from google.genai import types
|
||||||
|
|
||||||
|
import agent_manager
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_CORTEX_DIR = Path(__file__).parent # .../Cortex_and_Inara_dev/cortex/
|
||||||
|
_PROJECT_ROOT = _CORTEX_DIR.parent # .../Cortex_and_Inara_dev/
|
||||||
|
|
||||||
|
# Known project aliases — expand before passing to subprocess
|
||||||
|
_PROJECT_ALIASES: dict[str, str] = {
|
||||||
|
"cortex": str(_PROJECT_ROOT),
|
||||||
|
"aether_api": "~/OSIT_dev/aether_api_fastapi",
|
||||||
|
"aether_frontend": "~/OSIT_dev/aether_app_sveltekit",
|
||||||
|
"aether_container": "~/OSIT_dev/aether_container_env",
|
||||||
|
}
|
||||||
|
|
||||||
|
_MAX_OUTPUT_CHARS = 12_000
|
||||||
|
|
||||||
|
|
||||||
|
async def aider_run(
|
||||||
|
project: str,
|
||||||
|
task: str,
|
||||||
|
files: list[str] | None = None,
|
||||||
|
model: str | None = None,
|
||||||
|
auto_commit: bool = True,
|
||||||
|
timeout: int = 300,
|
||||||
|
background: bool = False,
|
||||||
|
notify: bool = False,
|
||||||
|
) -> str:
|
||||||
|
"""Run Aider with a single task in a project directory, then exit.
|
||||||
|
|
||||||
|
When background=True, fires the subprocess asynchronously and returns an agent_id
|
||||||
|
immediately. Use agent_status(agent_id) to check progress; set notify=True to
|
||||||
|
receive a push/Talk notification on completion.
|
||||||
|
"""
|
||||||
|
resolved = _PROJECT_ALIASES.get(project, project)
|
||||||
|
cwd = Path(os.path.expanduser(resolved))
|
||||||
|
|
||||||
|
if not cwd.is_dir():
|
||||||
|
return f"Error: project directory '{resolved}' does not exist."
|
||||||
|
|
||||||
|
timeout = min(max(int(timeout), 10), 600)
|
||||||
|
|
||||||
|
cmd: list[str] = [
|
||||||
|
"aider",
|
||||||
|
"--message", task,
|
||||||
|
"--yes-always",
|
||||||
|
"--no-pretty",
|
||||||
|
"--no-stream",
|
||||||
|
"--no-check-update",
|
||||||
|
"--no-detect-urls",
|
||||||
|
"--auto-commits" if auto_commit else "--no-auto-commits",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Inject OpenRouter credentials from the Cortex model registry if available.
|
||||||
|
# Aider's subprocess inherits Cortex's environment, which doesn't include keys
|
||||||
|
# stored in ~/.env or shell profiles. Pulling from the registry keeps it self-contained.
|
||||||
|
try:
|
||||||
|
import model_registry
|
||||||
|
from persona import get_user
|
||||||
|
user = get_user() or "scott"
|
||||||
|
registry = model_registry.get_registry(user)
|
||||||
|
or_host = next(
|
||||||
|
(h for h in registry.get("hosts", []) if "openrouter.ai" in h.get("api_url", "")),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
if or_host and or_host.get("api_key"):
|
||||||
|
cmd += ["--api-key", f"openrouter={or_host['api_key']}"]
|
||||||
|
except Exception:
|
||||||
|
user = "scott" # non-fatal — user may have key via env or .aider.conf.yml
|
||||||
|
|
||||||
|
if model:
|
||||||
|
cmd += ["--model", model]
|
||||||
|
|
||||||
|
for f in (files or []):
|
||||||
|
cmd += ["--file", f]
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"aider_run: project=%s model=%s auto_commit=%s files=%s background=%s task=%.120s",
|
||||||
|
project, model, auto_commit, files, background, task,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _run() -> str:
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
*cmd,
|
||||||
|
cwd=str(cwd),
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.PIPE,
|
||||||
|
)
|
||||||
|
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=float(timeout))
|
||||||
|
|
||||||
|
out = stdout.decode(errors="replace").strip()
|
||||||
|
err = stderr.decode(errors="replace").strip()
|
||||||
|
|
||||||
|
parts = []
|
||||||
|
if out:
|
||||||
|
parts.append(out)
|
||||||
|
if err:
|
||||||
|
parts.append(f"[stderr]\n{err}")
|
||||||
|
combined = "\n".join(parts) if parts else "(no output)"
|
||||||
|
|
||||||
|
if len(combined) > _MAX_OUTPUT_CHARS:
|
||||||
|
half = _MAX_OUTPUT_CHARS // 2
|
||||||
|
combined = (
|
||||||
|
combined[:half]
|
||||||
|
+ f"\n\n[... {len(combined) - _MAX_OUTPUT_CHARS} chars trimmed ...]\n\n"
|
||||||
|
+ combined[-half:]
|
||||||
|
)
|
||||||
|
|
||||||
|
if proc.returncode not in (0, 1):
|
||||||
|
return f"[exit {proc.returncode}]\n{combined}"
|
||||||
|
return combined
|
||||||
|
|
||||||
|
if background:
|
||||||
|
rec = await agent_manager.register(
|
||||||
|
user=user,
|
||||||
|
role="aider",
|
||||||
|
task=task,
|
||||||
|
level=2,
|
||||||
|
notify=notify,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _bg_task() -> None:
|
||||||
|
try:
|
||||||
|
result = await _run()
|
||||||
|
await agent_manager.finish(rec.agent_id, result, "done")
|
||||||
|
logger.info("aider_run [bg]: done %s", rec.agent_id[:8])
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
await agent_manager.finish(rec.agent_id, "Cancelled.", "cancelled")
|
||||||
|
raise
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
msg = f"Aider timed out after {timeout}s"
|
||||||
|
logger.warning("aider_run [bg]: timeout %s", rec.agent_id[:8])
|
||||||
|
await agent_manager.finish(rec.agent_id, msg, "timeout")
|
||||||
|
except FileNotFoundError:
|
||||||
|
msg = "Error: 'aider' not found in PATH — run: pip install aider-chat"
|
||||||
|
await agent_manager.finish(rec.agent_id, msg, "failed")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("aider_run [bg]: failed %s: %s", rec.agent_id[:8], e)
|
||||||
|
await agent_manager.finish(rec.agent_id, str(e), "failed")
|
||||||
|
|
||||||
|
bg = asyncio.create_task(_bg_task())
|
||||||
|
agent_manager.set_task_ref(rec.agent_id, bg)
|
||||||
|
return (
|
||||||
|
f"Aider task started in background. ID: {rec.agent_id}\n"
|
||||||
|
f"Use agent_status('{rec.agent_id}') to monitor progress."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Synchronous path
|
||||||
|
try:
|
||||||
|
return await _run()
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
return f"Error: aider timed out after {timeout}s"
|
||||||
|
except FileNotFoundError:
|
||||||
|
return "Error: 'aider' not found in PATH — run: pip install aider-chat"
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("aider_run error: %s", e)
|
||||||
|
return f"Error: {e}"
|
||||||
|
|
||||||
|
|
||||||
|
DECLARATIONS = [
|
||||||
|
types.FunctionDeclaration(
|
||||||
|
name="aider_run",
|
||||||
|
description=(
|
||||||
|
"Run the Aider AI coding agent on a project with a single task, then exit. "
|
||||||
|
"Aider maps the repo, edits files, runs lint checks, and optionally commits. "
|
||||||
|
"Use for code changes, bug fixes, refactoring, or new features across any "
|
||||||
|
"configured project. Model is set via AIDER_MODEL env var or .aider.conf.yml "
|
||||||
|
"in the project directory — no API key needed if the project is already configured. "
|
||||||
|
"Set background=True for long tasks — returns an agent_id immediately and sends "
|
||||||
|
"a notification when done. ADMIN ONLY. Requires confirmation."
|
||||||
|
),
|
||||||
|
parameters=types.Schema(
|
||||||
|
type=types.Type.OBJECT,
|
||||||
|
properties={
|
||||||
|
"project": types.Schema(
|
||||||
|
type=types.Type.STRING,
|
||||||
|
description=(
|
||||||
|
"Project alias or absolute path. Known aliases: "
|
||||||
|
"'cortex' (this project), 'aether_api', 'aether_frontend', "
|
||||||
|
"'aether_container'. Or provide an absolute path like "
|
||||||
|
"'/home/scott/OSIT_dev/aether_api_fastapi'."
|
||||||
|
),
|
||||||
|
),
|
||||||
|
"task": types.Schema(
|
||||||
|
type=types.Type.STRING,
|
||||||
|
description=(
|
||||||
|
"Full task description sent to Aider as --message. "
|
||||||
|
"Be specific — include file names, what to change, and why. "
|
||||||
|
"Example: 'In cortex/tools/web.py, add a max_chars parameter "
|
||||||
|
"to web_read() capped at 32768.'"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
"files": types.Schema(
|
||||||
|
type=types.Type.ARRAY,
|
||||||
|
items=types.Schema(type=types.Type.STRING),
|
||||||
|
description=(
|
||||||
|
"Optional list of files to add explicitly to the editing context "
|
||||||
|
"(paths relative to the project root). "
|
||||||
|
"Aider also builds a repo map automatically — these get priority."
|
||||||
|
),
|
||||||
|
),
|
||||||
|
"model": types.Schema(
|
||||||
|
type=types.Type.STRING,
|
||||||
|
description=(
|
||||||
|
"Optional model override. Examples: 'deepseek/deepseek-chat', "
|
||||||
|
"'openrouter/anthropic/claude-3-5-haiku-20241022'. "
|
||||||
|
"Defaults to the project's .aider.conf.yml model or AIDER_MODEL env var."
|
||||||
|
),
|
||||||
|
),
|
||||||
|
"auto_commit": types.Schema(
|
||||||
|
type=types.Type.BOOLEAN,
|
||||||
|
description=(
|
||||||
|
"Auto-commit changes after edits (default: true). "
|
||||||
|
"Set to false to review diffs before committing manually."
|
||||||
|
),
|
||||||
|
),
|
||||||
|
"timeout": types.Schema(
|
||||||
|
type=types.Type.INTEGER,
|
||||||
|
description="Max seconds to wait for Aider to finish (default 300, max 600).",
|
||||||
|
),
|
||||||
|
"background": types.Schema(
|
||||||
|
type=types.Type.BOOLEAN,
|
||||||
|
description=(
|
||||||
|
"Run asynchronously in the background (default: false). "
|
||||||
|
"Returns an agent_id immediately; use agent_status(agent_id) to monitor. "
|
||||||
|
"Recommended for tasks expected to take more than ~60 seconds."
|
||||||
|
),
|
||||||
|
),
|
||||||
|
"notify": types.Schema(
|
||||||
|
type=types.Type.BOOLEAN,
|
||||||
|
description=(
|
||||||
|
"Send a push/Talk notification when the background task completes "
|
||||||
|
"(default: false). Only applies when background=true."
|
||||||
|
),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
required=["project", "task"],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
]
|
||||||
362
documentation/PLAN__Tool_Schema_Optimization.md
Normal file
362
documentation/PLAN__Tool_Schema_Optimization.md
Normal file
@@ -0,0 +1,362 @@
|
|||||||
|
# PLAN — Reduce Tool Schema Overhead in Cortex
|
||||||
|
|
||||||
|
**Goal:** Eliminate the per-round, per-message transmission of all 45 tool definitions.
|
||||||
|
Drop overhead from ~8K-10K tokens per round to near zero for casual chat, and to a
|
||||||
|
relevant subset for orchestrated work.
|
||||||
|
|
||||||
|
**Status:** Draft — ready for Claude Code implementation.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Background
|
||||||
|
|
||||||
|
Every orchestrated (⚡ toggled on) message triggers a ReAct tool loop. The full 45-tool
|
||||||
|
schema is rebuilt and transmitted **on every round of every call** — including rounds
|
||||||
|
where no tool is invoked and messages where no tool is needed at all. This wastes
|
||||||
|
thousands of tokens per interaction.
|
||||||
|
|
||||||
|
The architecture already has the building blocks for a fix: role configs support a
|
||||||
|
`tools` allow-list, and `get_openai_tools_for_role()` already accepts filtering
|
||||||
|
parameters. They're just not being wired together effectively.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 1 — Role-Based Tool Filtering (Foundation)
|
||||||
|
|
||||||
|
**Effort:** Small. **Impact:** High.
|
||||||
|
|
||||||
|
### What
|
||||||
|
|
||||||
|
Define which tools each role actually needs, then enforce the filtering so roles
|
||||||
|
only receive their relevant tool subset.
|
||||||
|
|
||||||
|
### Implementation
|
||||||
|
|
||||||
|
**1. Audit every role and define tool lists.**
|
||||||
|
|
||||||
|
| Role | Tools needed | Approx count |
|
||||||
|
|------|-------------|-------------|
|
||||||
|
| `chat` | None (zero tools — should never be in the orchestration loop) | 0 |
|
||||||
|
| `orchestrator` | web, file (admin), shell (admin), tasks, cron, reminders, scratchpad, Aether journals, agent notes, system (admin), spawn_agent, HA, ae_db, git, file_diff, file_syntax_check, notifications (admin) | 25-30 |
|
||||||
|
| `distill` | None (pure text processing) | 0 |
|
||||||
|
| `coder` | file (admin), shell (admin), git, file_diff, file_syntax_check | 8-10 |
|
||||||
|
| `research` | web_search, web_read, http_fetch | 3 |
|
||||||
|
| `admin` (role) | All 45 (admin-level access) | 45 |
|
||||||
|
|
||||||
|
**2. Store tool lists per role in `config.yaml` or the model registry defaults.**
|
||||||
|
The role config already has a `tools` field — populate it with the lists above.
|
||||||
|
|
||||||
|
**3. Enforce in `get_openai_tools_for_role()`.**
|
||||||
|
The function is called from `openai_orchestrator.py` around line 451. Currently if
|
||||||
|
`tools` is empty/missing it returns all tools. Change so that:
|
||||||
|
|
||||||
|
- If role config has a `tools` list → return only those tools
|
||||||
|
- If role config has `tools: false` → return empty list
|
||||||
|
- If role config has no `tools` field → return all (backward compat)
|
||||||
|
|
||||||
|
At the call site (`_run_from_messages`), pass the role's tool allow-list into
|
||||||
|
`get_openai_tools_for_role()` via the `tool_list` parameter that already exists.
|
||||||
|
|
||||||
|
### Files to change
|
||||||
|
|
||||||
|
- `cortex/openai_orchestrator.py` — wire role config `tools` into the call to
|
||||||
|
`get_openai_tools_for_role()`
|
||||||
|
- `cortex/model_registry.py` — ensure `get_role_config()` returns the `tools` field
|
||||||
|
(it does already, line 487)
|
||||||
|
- `cortex/config.py` or `home/{user}/model_registry.json` — define the tool lists
|
||||||
|
per default role
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 2 — Dynamic Keyword-Based Tool Routing (High Impact)
|
||||||
|
|
||||||
|
**Effort:** Small. **Impact:** Very High.
|
||||||
|
|
||||||
|
### What
|
||||||
|
|
||||||
|
Before entering the ReAct tool loop, scan the user's message with a lightweight
|
||||||
|
keyword classifier to determine which tool categories are relevant. Only include
|
||||||
|
tools from matched categories — typically 3-8 tools instead of 45.
|
||||||
|
|
||||||
|
This is the **core optimization.** For the 80%+ of messages that only need a narrow
|
||||||
|
set of tools (or none at all), this eliminates the bulk of schema overhead on every
|
||||||
|
round.
|
||||||
|
|
||||||
|
### The Hybrid Stack
|
||||||
|
|
||||||
|
```
|
||||||
|
User message
|
||||||
|
↓
|
||||||
|
[1] Role filter (Phase 1) — narrows 45 tools → ~25 for orchestrator role
|
||||||
|
↓
|
||||||
|
[2] Keyword classifier (Phase 2) — narrows ~25 → 3-8 relevant tools
|
||||||
|
↓
|
||||||
|
[3] ReAct loop — only transmitting the relevant subset each round
|
||||||
|
```
|
||||||
|
|
||||||
|
If the keyword classifier matches nothing (e.g. "good morning", "test", "what do you
|
||||||
|
think?"), it returns an empty tool set — effectively routing the message as a pure
|
||||||
|
chat interaction with zero tool overhead.
|
||||||
|
|
||||||
|
### Keyword Category Map
|
||||||
|
|
||||||
|
Each category maps keywords → tool names. Simple regex/contains matching.
|
||||||
|
|
||||||
|
| Category | Trigger keywords | Tools included |
|
||||||
|
|----------|-----------------|---------------|
|
||||||
|
| `web` | search, google, look up, what is, who is, weather, forecast, temperature, news, article, website, find, research | web_search, web_read, http_fetch |
|
||||||
|
| `web_post` | post to, send to, webhook, trigger, notify | http_post |
|
||||||
|
| `file` | read file, show file, open file, list files, directory, grep, find in, search in, diff, compare, syntax check | file_read, file_list, file_write, file_diff, file_grep, file_syntax_check, file_stat |
|
||||||
|
| `git` | git, commit, branch, pushed, pulled, merge, repo, repository | git_status, git_log, git_diff |
|
||||||
|
| `system` | restart, update, status, logs, deploy, shell, command, run, health, is it running | cortex_status, cortex_logs, cortex_restart, cortex_update, shell_exec |
|
||||||
|
| `tasks` | task, todo, to-do, to do, add task, create task, what's on my list, pending | task_list, task_create, task_update, task_complete |
|
||||||
|
| `cron` | schedule, cron, every day, every week, recurring, automate, job | cron_list, cron_add, cron_remove, cron_toggle |
|
||||||
|
| `reminders` | remind, reminder, remember, don't forget | reminders_add, reminders_list, reminders_remove, reminders_clear |
|
||||||
|
| `scratchpad` | scratch, scratchpad, working notes, jot down, notepad | scratch_read, scratch_write, scratch_append, scratch_clear |
|
||||||
|
| `ha` | home assistant, light, thermostat, turn on, turn off, kitchen, bedroom, switch, sensor, temperature | ha_get_state, ha_get_states, ha_call_service |
|
||||||
|
| `aether` | journal, aether, note entry, log entry, search journals, ae_ | ae_journal_list, ae_journal_search, ae_journal_entry_read, ae_journal_entries_list, ae_journal_entry_create, ae_journal_entry_update, ae_journal_entry_disable, ae_journal_entry_append, ae_journal_entry_prepend |
|
||||||
|
| `aether_db` | database, query, sql, select, db, table, schema, maria | ae_db_query, ae_db_describe, ae_db_show_view |
|
||||||
|
| `notifications` | notify, push, send email, email, message, talk, nextcloud | web_push, email_send, nc_talk_send, nc_talk_history |
|
||||||
|
| `agents` | spawn, sub-agent, delegate, agent | spawn_agent |
|
||||||
|
| `notes` | agent notes, private notes, my notes | agent_notes_read, agent_notes_write, agent_notes_append, agent_notes_clear |
|
||||||
|
| `session` | remember, session, history, last time, what did we, earlier, yesterday, last week | session_read, session_search |
|
||||||
|
| `ae_tasks` | ae task, kanban, board | ae_task_list |
|
||||||
|
| `claude` | claude, allow directory, permissions | claude_allow_dir |
|
||||||
|
|
||||||
|
### Implementation
|
||||||
|
|
||||||
|
In `openai_orchestrator.py`, before the ReAct loop starts:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def _classify_tool_categories(user_message: str) -> list[str]:
|
||||||
|
"""Classify a user message into tool categories based on keywords.
|
||||||
|
|
||||||
|
Returns a list of category names whose tools should be included.
|
||||||
|
Returns empty list if no categories match (pure chat).
|
||||||
|
"""
|
||||||
|
message_lower = user_message.lower()
|
||||||
|
|
||||||
|
category_keywords = {
|
||||||
|
"web": ["search", "look up", "what is", "who is", "weather",
|
||||||
|
"forecast", "news", "find on", "google", "website",
|
||||||
|
"article", "research", "temperature"],
|
||||||
|
"web_post": ["post to", "send to", "webhook", "trigger webhook"],
|
||||||
|
"file": ["read file", "show file", "list file", "directory",
|
||||||
|
"grep", "search in", "find in", "diff", "compare",
|
||||||
|
"syntax check", "open file"],
|
||||||
|
"git": ["git", "commit", "branch", "pulled", "merged",
|
||||||
|
"repository", "repo"],
|
||||||
|
"system": ["restart", "update", "status", "logs", "deploy",
|
||||||
|
"run command", "shell", "is it running", "health"],
|
||||||
|
"tasks": ["task", "todo", "to-do", "to do", "add task",
|
||||||
|
"create task", "pending", "what's on my list"],
|
||||||
|
"cron": ["schedule", "cron", "every day", "every week",
|
||||||
|
"recurring", "automate", "job"],
|
||||||
|
"reminders": ["remind", "reminder", "remember", "don't forget"],
|
||||||
|
"scratchpad": ["scratch", "scratchpad", "working note", "jot down",
|
||||||
|
"notepad"],
|
||||||
|
"ha": ["home assistant", "light", "thermostat", "turn on",
|
||||||
|
"turn off", "switch", "sensor", "temperature in",
|
||||||
|
"kitchen", "bedroom", "garage"],
|
||||||
|
"aether": ["journal", "aether journal", "note entry", "log entry",
|
||||||
|
"search journal", "ae_journal"],
|
||||||
|
"aether_db": ["database", "query", "sql", "select", "db", "table",
|
||||||
|
"schema", "maria", "run query"],
|
||||||
|
"notifications":["notify", "push notification", "send email", "email",
|
||||||
|
"talk message", "nextcloud"],
|
||||||
|
"agents": ["spawn", "sub-agent", "delegate", "spawn agent"],
|
||||||
|
"notes": ["agent notes", "private notes", "my notes",
|
||||||
|
"agent_notes"],
|
||||||
|
"session": ["remember", "session", "history", "last time",
|
||||||
|
"what did we", "earlier", "yesterday", "last week",
|
||||||
|
"previously"],
|
||||||
|
"ae_tasks": ["ae task", "kanban", "board", "ae_task"],
|
||||||
|
"claude": ["claude allow", "claude directory"],
|
||||||
|
}
|
||||||
|
|
||||||
|
matched = []
|
||||||
|
for category, keywords in category_keywords.items():
|
||||||
|
if any(kw in message_lower for kw in keywords):
|
||||||
|
matched.append(category)
|
||||||
|
|
||||||
|
return matched
|
||||||
|
```
|
||||||
|
|
||||||
|
Then at the orchestration entry point, after determining the role's base tool list
|
||||||
|
(Phase 1), apply the keyword filter:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Phase 1: Get role's base tool list
|
||||||
|
role_tools = get_role_config(username, role).get("tools")
|
||||||
|
|
||||||
|
# Phase 2: Dynamically narrow based on message content
|
||||||
|
matched_categories = _classify_tool_categories(user_message)
|
||||||
|
if matched_categories:
|
||||||
|
category_tool_map = { ... } # defined at module level
|
||||||
|
dynamic_tools = []
|
||||||
|
for cat in matched_categories:
|
||||||
|
dynamic_tools.extend(category_tool_map.get(cat, []))
|
||||||
|
# Intersect with role_tools so we never grant more than the role allows
|
||||||
|
if role_tools:
|
||||||
|
dynamic_tools = [t for t in dynamic_tools if t in role_tools]
|
||||||
|
active_tools = get_openai_tools_for_role(
|
||||||
|
role=user_role,
|
||||||
|
tool_list=dynamic_tools or None
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# No keywords matched — likely causal chat route to /chat
|
||||||
|
# or use empty tool list
|
||||||
|
active_tools = []
|
||||||
|
```
|
||||||
|
|
||||||
|
### Edge Cases to Handle
|
||||||
|
|
||||||
|
1. **Multiple categories match:** Union all matched tool sets. The `for cat in matched_categories` loop handles this naturally.
|
||||||
|
|
||||||
|
2. **No categories match:** Return empty tool set. The orchestrator loop won't start — this effectively becomes a chat message without incurring the schema tax. If the LLM needs tools anyway, it will respond with a natural language request, and the user can rephrase.
|
||||||
|
|
||||||
|
3. **Ambiguous short messages:** "Hey can you check something" — matches nothing, falls through to empty tools. This is correct behavior; the LLM will ask "what do you want me to check?" and the next message will have a clear intent.
|
||||||
|
|
||||||
|
4. **Over-broad keywords:** "search" in "search journals" could trigger both `web` and `aether`. The union handles this — both categories' tools are included, which is what you want.
|
||||||
|
|
||||||
|
### File to change
|
||||||
|
|
||||||
|
- `cortex/openai_orchestrator.py` — add `_classify_tool_categories()` function and
|
||||||
|
wire it into the orchestration entry point before the ReAct loop
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 3 — Cache Tool Schema per Session
|
||||||
|
|
||||||
|
**Effort:** Medium. **Impact:** Medium.
|
||||||
|
|
||||||
|
### What
|
||||||
|
|
||||||
|
The tool schema doesn't change between rounds of the same session for a given role.
|
||||||
|
After Phase 2 narrows it to, say, 5 tools, those 5 tool definitions are identical
|
||||||
|
every round. Cache them.
|
||||||
|
|
||||||
|
### Implementation
|
||||||
|
|
||||||
|
Add a session-scoped cache in `openai_orchestrator.py`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Module-level cache: key = f"{session_id}:{role}:{sorted_tool_list}"
|
||||||
|
_tool_schema_cache: dict[str, list[dict]] = {}
|
||||||
|
|
||||||
|
def _get_cached_tool_schema(session_id: str, role: str, tool_list: list[str] | None) -> list[dict]:
|
||||||
|
key = f"{session_id}:{role}:{sorted(tool_list) if tool_list else 'all'}"
|
||||||
|
if key in _tool_schema_cache:
|
||||||
|
return _tool_schema_cache[key]
|
||||||
|
schemas = get_openai_tools_for_role(role=role, tool_list=tool_list)
|
||||||
|
_tool_schema_cache[key] = schemas
|
||||||
|
return schemas
|
||||||
|
```
|
||||||
|
|
||||||
|
Invalidation: Cache key includes the tool list, so if the dynamic classifier returns
|
||||||
|
different categories on the next message, it gets a fresh cache entry. No explicit
|
||||||
|
invalidation needed.
|
||||||
|
|
||||||
|
### File to change
|
||||||
|
|
||||||
|
- `cortex/openai_orchestrator.py` — add cache dict and lookup before calling
|
||||||
|
`get_openai_tools_for_role()`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 4 — Reduce Default Max Rounds
|
||||||
|
|
||||||
|
**Effort:** Trivial. **Impact:** Low-to-medium.
|
||||||
|
|
||||||
|
### What
|
||||||
|
|
||||||
|
Most requests resolve in 1-3 tool calls. A global cap of 10 means up to 7 wasted
|
||||||
|
schema transmissions on edge cases.
|
||||||
|
|
||||||
|
### Implementation
|
||||||
|
|
||||||
|
1. Make `max_rounds` configurable per model in the model registry (it already exists
|
||||||
|
in some model configs — see `home/brian/model_registry.json` line 42).
|
||||||
|
2. Read it from the model config during orchestration instead of using the global
|
||||||
|
`.env` value.
|
||||||
|
3. Lower the default from 10 to 5.
|
||||||
|
|
||||||
|
### Files to change
|
||||||
|
|
||||||
|
- `cortex/.env` — change `ORCHESTRATOR_MAX_ROUNDS=10` to `=5`
|
||||||
|
- `cortex/openai_orchestrator.py` — read per-model `max_rounds` from `model_cfg`
|
||||||
|
instead of only from settings
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 5 — UI Improvements (Independent)
|
||||||
|
|
||||||
|
**Effort:** Small. **Impact:** Medium (UX).
|
||||||
|
|
||||||
|
### What
|
||||||
|
|
||||||
|
Make the tool mode indicator more obvious so the user can quickly tell whether
|
||||||
|
they're incurring the tool tax.
|
||||||
|
|
||||||
|
### Ideas
|
||||||
|
|
||||||
|
- Change ⚡ color: green when tools are on, gray when off
|
||||||
|
- Swap icon: ⚡ (tools) vs. 💬 (chat only)
|
||||||
|
- Add tooltip: "Tools enabled — all 45 tool schemas sent with each message"
|
||||||
|
- Optional: add a "Quick Question" button that sends to `/chat` directly, bypassing
|
||||||
|
the orchestrator entirely
|
||||||
|
|
||||||
|
### Files to change
|
||||||
|
|
||||||
|
- Svelte UI component — likely `ChatInput.svelte` or the chat mode toggle component
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Recommended Execution Order
|
||||||
|
|
||||||
|
1. **Phase 1** (role filtering) — foundation. Defines the base tool set per role.
|
||||||
|
2. **Phase 2** (keyword routing) — **the big one.** Slashes 45 tools → 3-8 for the
|
||||||
|
vast majority of messages. Builds on Phase 1's role filtering.
|
||||||
|
3. **Phase 4** (lower max_rounds) — trivial change, do alongside Phase 2.
|
||||||
|
4. **Phase 3** (schema caching) — more involved, compounds savings from Phase 2.
|
||||||
|
5. **Phase 5** (UI) — independent UX polish, can be done any time.
|
||||||
|
|
||||||
|
### Quick Win Path (Recommended First Session)
|
||||||
|
|
||||||
|
Phases 1 + 2 + 4 can be done in a single Claude Code session. They're all in
|
||||||
|
`openai_orchestrator.py` and `model_registry.py` — the same few files. Estimated
|
||||||
|
effort: 45-60 minutes of coding.
|
||||||
|
|
||||||
|
Phase 3 (caching) is a separate, focused session afterward.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Appendix A: Code Locations (from grep audit 2026-05-15)
|
||||||
|
|
||||||
|
| What | File | Line |
|
||||||
|
|------|------|------|
|
||||||
|
| `get_openai_tools_for_role` definition | `cortex/tools.py` | ~540 |
|
||||||
|
| Call site (decides active_tools) | `cortex/openai_orchestrator.py` | ~449 |
|
||||||
|
| `_run_from_messages()` tool loop | `cortex/openai_orchestrator.py` | ~260 |
|
||||||
|
| Role config tools field | `cortex/model_registry.py` | ~487 |
|
||||||
|
| `get_role_config()` | `cortex/model_registry.py` | ~473 |
|
||||||
|
| `save_role_config()` (tools allow-list) | `cortex/model_registry.py` | ~455 |
|
||||||
|
| Global `ORCHESTRATOR_MAX_ROUNDS` | `cortex/.env` | 35 |
|
||||||
|
| `REQUIRED_ROLES` | `cortex/model_registry.py` | 163 |
|
||||||
|
| `DEFINED_ROLES` config | `cortex/config.py` | 80 |
|
||||||
|
| Per-model `max_rounds` example | `home/brian/model_registry.json` | 42 |
|
||||||
|
|
||||||
|
## Appendix B: Token Savings Estimate
|
||||||
|
|
||||||
|
| Scenario | Before (per round) | After Phase 1 | After Phase 1+2 | After All Phases |
|
||||||
|
|----------|-------------------|--------------|-----------------|-----------------|
|
||||||
|
| "What's the weather?" | ~9K tokens | ~5K (25 tools) | ~600 (3 web tools) | ~600 (cached) |
|
||||||
|
| "Good morning" | ~9K tokens | ~5K (25 tools) | 0 (routed to chat) | 0 |
|
||||||
|
| "Turn off kitchen lights" | ~9K tokens | ~5K (25 tools) | ~600 (3 HA tools) | ~600 (cached) |
|
||||||
|
| "Search journals for X" | ~9K tokens | ~5K (25 tools) | ~2K (10 aether tools) | ~2K (cached) |
|
||||||
|
| "Create a task" | ~9K tokens | ~5K (25 tools) | ~800 (4 task tools) | ~800 (cached) |
|
||||||
|
| "Run a SQL query" | ~9K tokens | ~5K (25 tools) | ~600 (3 db tools) | ~600 (cached) |
|
||||||
|
|
||||||
|
At 3 rounds per request and 50 requests/day, that's roughly **1.3M tokens/day saved**
|
||||||
|
vs. **~13K/day after all optimizations** — a 99% reduction for casual chat, ~90% for
|
||||||
|
most tool-using queries.
|
||||||
Reference in New Issue
Block a user