feat: OPERATIONS.md bootstrap doc + load at T2+; patch stale persona files

- New home/scott/persona/inara/OPERATIONS.md: self-maintenance workflow (cortex_update → review → cortex_restart), access control table, key paths, memory file map, distillation cadence, channel/architecture notes - context_loader.py: load OPERATIONS.md at Tier 2+ after PROTOCOLS.md - TOOLS.md: count 39→40, add web_push to Notifications section - PROTOCOLS.md: replace stale 10-tool list with reference to TOOLS.md - CONTEXT_TIERS.md: fix memory file names (MEMORY.md → LONG/MID/SHORT), update Tier 2 load list, fix Hard Rules credentials note Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
feat: context budget enforcement + compaction in OpenAI orchestrator
2026-05-05 22:21:03 -04:00 · 2026-05-05 22:01:54 -04:00 · 2026-05-05 20:42:32 -04:00
9 changed files with 135 additions and 13 deletions
--- a/cortex/context_loader.py
+++ b/cortex/context_loader.py
@@ -65,6 +65,10 @@ def load_context(
    if proto_path.exists():
        parts.append(f"--- PROTOCOLS.md ---\n{proto_path.read_text()}")
    ops_path = inara_dir / "OPERATIONS.md"
    if ops_path.exists():
        parts.append(f"--- OPERATIONS.md ---\n{ops_path.read_text()}")
    # Global tool reference (same for all personas)
    tools_path = _STATIC_DIR / "TOOLS.md"
    if tools_path.exists():
--- a/cortex/openai_orchestrator.py
+++ b/cortex/openai_orchestrator.py
@@ -26,6 +26,7 @@ from openai import AsyncOpenAI
 from config import settings
 from orchestrator_engine import OrchestrateCheckpoint, OrchestratorResult
 from tools import OPENAI_TOOL_SCHEMAS, call_tool, get_openai_tools_for_role, get_tools_for_role, CONFIRM_REQUIRED
 import tool_audit
 logger = logging.getLogger(__name__)
@@ -73,6 +74,7 @@ async def run(
    effective_confirm = (CONFIRM_REQUIRED - set(_confirm_allow)) | set(_confirm_deny)
    client, model_name, active_tools = _build_client(model_cfg, user_role, tool_list)
    tool_audit.set_context("openai", model_cfg.get("label") or model_name)
    sys_content = (system_prompt or "") + _TOOL_INSTRUCTION
    messages: list[dict] = [{"role": "system", "content": sys_content}]
@@ -188,6 +190,66 @@ async def resume(checkpoint: OrchestrateCheckpoint, confirmed: bool) -> Orchestr
    )
 _CHARS_PER_TOKEN = 4
 # Fixed token overhead budget for sending 40 tool schemas per call
 _TOOL_SCHEMA_OVERHEAD = 3_000
 # Chars to keep per truncated old tool result
 _TRUNC_RESULT_CHARS = 400
 # Always keep the last N tool-result messages uncompacted
 _KEEP_RECENT_TOOL_MSGS = 6  # ~2 rounds of 3 tools each
 def _estimate_tokens(messages: list[dict]) -> int:
    total = sum(len(json.dumps(m)) for m in messages)
    return total // _CHARS_PER_TOKEN + _TOOL_SCHEMA_OVERHEAD
 def _compact_messages(messages: list[dict], budget_tokens: int) -> list[dict]:
    """
    Truncate old tool result content when approaching the context budget.
    Strategy: keep system message, recent assistant/tool rounds, and the
    original user task intact. Truncate content of old tool results in the
    middle of the conversation — the model only needs recent results to reason.
    """
    if _estimate_tokens(messages) <= budget_tokens:
        return messages
    tool_indices = [i for i, m in enumerate(messages) if m.get("role") == "tool"]
    n_to_compact = max(0, len(tool_indices) - _KEEP_RECENT_TOOL_MSGS)
    if n_to_compact == 0:
        return messages  # nothing old enough to trim
    compact_set = set(tool_indices[:n_to_compact])
    result = []
    chars_saved = 0
    for i, msg in enumerate(messages):
        if i in compact_set:
            content = msg.get("content", "")
            if len(content) > _TRUNC_RESULT_CHARS:
                msg = dict(msg)
                saved = len(content) - _TRUNC_RESULT_CHARS
                chars_saved += saved
                msg["content"] = (
                    content[:_TRUNC_RESULT_CHARS]
                    + f" …[{len(content) - _TRUNC_RESULT_CHARS} chars omitted]"
                )
        result.append(msg)
    new_est = _estimate_tokens(result)
    logger.info(
        "context compaction: saved ~%d tokens (%d chars), now ~%d / %d tokens",
        chars_saved // _CHARS_PER_TOKEN, chars_saved, new_est, budget_tokens,
    )
    return result
 def _context_budget(model_cfg: dict | None) -> int:
    """Return the usable token budget (75% of context window, min 16k, default 32k)."""
    context_k = (model_cfg or {}).get("context_k") or 32
    return max(16_000, int(context_k * 1000 * 0.75))
 async def _run_from_messages(
    client,
    messages: list[dict],
@@ -209,10 +271,13 @@ async def _run_from_messages(
    Returns (final_response, checkpoint) — checkpoint is set if confirmation is needed.
    """
    final_response = ""
    budget = _context_budget(model_cfg)
    for round_num in range(starting_round, settings.orchestrator_max_rounds):
-        logger.info("OpenAI orchestrator round %d / %d  model=%s",
+        messages = _compact_messages(messages, budget)
-                    round_num + 1, settings.orchestrator_max_rounds, model_name)
+        est = _estimate_tokens(messages)
        logger.info("OpenAI orchestrator round %d / %d  model=%s  ~%d tokens",
                    round_num + 1, settings.orchestrator_max_rounds, model_name, est)
        response = await client.chat.completions.create(
            model=model_name,
@@ -238,7 +303,8 @@ async def _run_from_messages(
            ]
        messages.append(assistant_msg)
-        if choice.finish_reason == "tool_calls" and msg.tool_calls:
+        # Some models set finish_reason="stop" even when tool_calls are present
        if msg.tool_calls and (choice.finish_reason in ("tool_calls", "stop", None)):
            # Snapshot state before tool responses for potential checkpoint
            pre_fn_state = list(messages)
@@ -247,10 +313,14 @@ async def _run_from_messages(
            for tc in msg.tool_calls:
                name = tc.function.name
                raw_args = tc.function.arguments or "{}"
                try:
-                    args_parsed = json.loads(tc.function.arguments)
+                    args_parsed = json.loads(raw_args)
-                except json.JSONDecodeError:
+                    if not isinstance(args_parsed, dict):
-                    args_parsed = {"raw": tc.function.arguments}
+                        raise ValueError("args must be a JSON object")
                except (json.JSONDecodeError, ValueError) as e:
                    logger.warning("Malformed tool args for %s: %s — args: %.200s", name, e, raw_args)
                    args_parsed = {}
                if name in effective_confirm:
                    pending_tools.append({"name": name, "args": args_parsed, "tool_call_id": tc.id})
--- a/cortex/orchestrator_engine.py
+++ b/cortex/orchestrator_engine.py
@@ -27,6 +27,7 @@ from config import settings
 from llm_client import complete
 from tools import TOOL_DECLARATIONS, call_tool, get_tools_for_role, CONFIRM_REQUIRED
 import usage_tracker
 import tool_audit
 from persona import _user
 logger = logging.getLogger(__name__)
@@ -140,6 +141,7 @@ async def run(
        )
    client = genai.Client(api_key=api_key)
    tool_audit.set_context("gemini", model_name or settings.orchestrator_model)
    _confirm_allow = frozenset(confirm_allow or ())
    _confirm_deny = frozenset(confirm_deny or ())
--- a/cortex/static/HELP.md
+++ b/cortex/static/HELP.md
@@ -6,7 +6,7 @@
     and are appended automatically by help.html when present.
 -->
-*Last updated: 2026-04-30*
+*Last updated: 2026-05-05*
 ---
@@ -16,7 +16,7 @@
 |---|---|
 | **Sessions** | Open the sessions panel — list, resume, or start sessions |
 | **N** (sliders icon) | Open the Context & Memory panel (N = current context tier) |
-| **☰** | Settings menu — Files, Account, Sign Out |
+| **☰** | Settings menu — Files, push notification toggle, Account, Sign Out |
 | **?** | Open this help panel |
 The **Context & Memory** panel (sliders icon with tier number) contains all configuration options:
@@ -59,7 +59,7 @@ The orchestrator runs a multi-step tool loop:
 The ⚡ toggle is **independent of the Role selector** — you can use any role (chat, coder, research, etc.) with or without tools. The orchestrator model is configured in **Account → Model Registry → Role Assignments → Orchestrator**. By default this is Gemini API.
-The full tool reference is in the **Tools** tab. 30 tools across web, files, shell, system, tasks, cron, reminders, scratchpad, notifications, and Aether Journals.
+The full tool reference is in the **Tools** tab. 40 tools across web, files, shell, system, tasks, cron, reminders, scratchpad, notifications, and Aether Journals.
 Tools mode is best for tasks requiring research, multi-step reasoning, or side effects (e.g. "search for X", "add a task", "what's on my list?", "append this to my journal"). Regular chat is faster for conversational turns.
@@ -222,6 +222,19 @@ The **Files** button opens an editor for your persona's identity and memory file
 Toggle **preview** / **edit** to switch between rendered markdown and raw text. **Ctrl+S** saves, **Esc** closes.
 The **Audit Log** group at the bottom of the sidebar (collapsed by default) lists tool call logs by date (`YYYY-MM-DD.jsonl`). Click any date to view a read-only table of every orchestrator tool call: time, tool name, status, model, args, and result snippet. Status is colour-coded: green = ok, red = error, amber = denied.
 ---
 ## Push Notifications
 Cortex can send browser push notifications — even when the tab is closed.
 - Open **☰ → Enable notifications** and accept the browser permission prompt.
 - Once enabled, the button shows **Notifications on** (in accent colour).
 - Click again to disable. Subscriptions are stored per-device.
 - The orchestrator's `web_push` tool lets Inara send you a push proactively (e.g. when a long task completes).
 ---
 ## Context & Memory ( ⚙ panel )
@@ -305,6 +318,13 @@ For direct access or scripting:
 | `GET` | `/orchestrate/{job_id}` | Poll job status and result |
 | `GET` | `/settings/models` | Model registry UI |
 | `POST` | `/api/models/role` | Set a role assignment (JSON body) |
 | `GET` | `/api/push/vapid-key` | VAPID public key (for push subscription) |
 | `POST` | `/api/push/subscribe` | Register a push subscription |
 | `DELETE` | `/api/push/subscribe` | Remove a push subscription |
 | `GET` | `/api/audit/files` | List available audit log dates (own data) |
 | `GET` | `/api/audit/day?date=` | Tool call entries for a specific date (own data) |
 | `GET` | `/api/audit/recent` | Recent tool calls across days (admin) |
 | `GET` | `/api/audit/stats` | Tool call counts by tool/status/user (admin) |
 | `GET` | `/health` | Health check — returns `{"status": "ok"}` |
 Chat request body (`POST /chat`):
--- a/cortex/static/TOOLS.md
+++ b/cortex/static/TOOLS.md
@@ -1,6 +1,6 @@
 # Tool Reference
-> This reference covers all 39 orchestrator tools available when the ⚡ toggle is on.
+> This reference covers all 40 orchestrator tools available when the ⚡ toggle is on.
 > Tools are invoked automatically by the orchestrator — you don't call them directly.
 ¹ **Admin only** — requires the `admin` role. Invisible to regular users.  
@@ -79,6 +79,7 @@
 | Tool | What it does |
 |---|---|
 | `web_push` | Send a browser push notification to the active user's registered devices |
 | `email_send` ¹ | Send an email via SMTP; recipient must match your `email_allowlist.json` |
 | `nc_talk_send` ¹ | Send a message to a Nextcloud Talk conversation |
--- a/cortex/static/app.js
+++ b/cortex/static/app.js
@@ -1547,6 +1547,7 @@
                <th class="at-time">Time</th>
                <th class="at-tool">Tool</th>
                <th class="at-status">Status</th>
                <th class="at-model">Model</th>
                <th class="at-args">Args</th>
                <th class="at-result">Result</th>
            </tr></thead>`;
@@ -1554,11 +1555,13 @@
            const tbody = document.createElement('tbody');
            for (const e of entries) {
                const time = (e.ts || '').slice(11, 19); // HH:MM:SS
                const model = e.model || e.engine || '';
                const tr = document.createElement('tr');
                tr.innerHTML = `
                    <td class="at-time">${time}</td>
                    <td class="at-tool" title="${e.tool || ''}">${e.tool || '?'}</td>
                    <td class="${_auditStatusClass(e.status)}">${e.status || '?'}</td>
                    <td class="at-model" title="${model}">${model}</td>
                    <td class="at-args" title="${(_fmtArgs(e.args) || '').replace(/"/g, '&quot;')}">${_fmtArgs(e.args)}</td>
                    <td class="at-result" title="${(e.result_snippet || '').replace(/</g, '&lt;').replace(/"/g, '&quot;')}">${
                        (e.result_snippet || '').replace(/</g, '&lt;').slice(0, 80)
--- a/cortex/static/style.css
+++ b/cortex/static/style.css
@@ -1261,7 +1261,8 @@
        .at-time   { width: 7em;  color: var(--muted); white-space: nowrap; }
        .at-tool   { width: 11em; color: var(--accent); font-weight: 500; }
        .at-status { width: 4.5em; font-weight: 600; }
-        .at-args   { width: 30%; color: var(--muted); }
+        .at-model  { width: 10em; color: var(--muted); }
        .at-args   { width: 25%; color: var(--muted); }
        .at-result { color: var(--muted); }
        .at-status.ok     { color: #4ade80; }
        .at-status.error  { color: #f87171; }
--- a/cortex/tool_audit.py
+++ b/cortex/tool_audit.py
@@ -16,6 +16,7 @@ Each line is a JSON object:
 import asyncio
 import json
 import logging
 from contextvars import ContextVar
 from datetime import datetime, date
 from pathlib import Path
@@ -29,6 +30,16 @@ _SNIPPET_MAX = 300   # chars of result to keep as snippet
 # Per-file write locks — prevents interleaved lines under concurrent tool calls
 _locks: dict[str, asyncio.Lock] = {}
 # ContextVars set by orchestrators before their tool loop runs
 _audit_engine: ContextVar[str] = ContextVar("_audit_engine", default="")
 _audit_model:  ContextVar[str] = ContextVar("_audit_model",  default="")
 def set_context(engine: str, model: str) -> None:
    """Call at the start of each orchestrator run to tag subsequent tool calls."""
    _audit_engine.set(engine)
    _audit_model.set(model)
 def _truncate_args(args: dict) -> dict:
    out = {}
@@ -63,6 +74,8 @@ async def record(
    entry = {
        "ts":             datetime.now().isoformat(timespec="seconds"),
        "user":           user,
        "engine":         _audit_engine.get(),
        "model":          _audit_model.get(),
        "tool":           tool,
        "args":           _truncate_args(args),
        "status":         status,
--- a/documentation/TODO__Agents.md
+++ b/documentation/TODO__Agents.md
@@ -39,8 +39,7 @@ New tools for `cortex/tools/` — higher-value additions that fill obvious gaps.
 - [x] **`file_write`** — overwrite/append to home_root paths, admin-only, confirm-required — 2026-04-29
 - [x] **`nc_talk_send`** — outbound NC Talk message via notification.py, admin-only — 2026-04-29
 - [x] **`email_send`** — SMTP via email_utils, per-user regex allowlist in `home/{user}/email_allowlist.json`, managed via Settings UI textarea + Files panel raw editor — 2026-04-29
- [ ] **`web_push`** — send a browser push notification (requires push subscription stored
+- [x] **`web_push`** — VAPID push via pywebpush; subscriptions in `home/{user}/push_subscriptions.json`; "Enable notifications" toggle in ☰ menu; sw.js push+notificationclick handlers — 2026-05-05
      per-user; pairs well with the PWA service worker already in place)
 ### [Channel] Proactive notifications
 Inara reaches out on her own initiative via NC Talk or Google Chat when a reminder
@@ -121,6 +120,15 @@ so Scott can see who's spending what.
 - [ ] Expose via `/api/usage` endpoint; add a summary row to the Settings page
 - [ ] Optional: soft spending limit with a warning toast when exceeded
 ### [Security] Tool call audit log — 2026-05-05
 Every orchestrator tool invocation logged to `home/{user}/tool_audit/YYYY-MM-DD.jsonl`.
 - [x] `tool_audit.py` — JSONL writer with asyncio locks; ContextVars for engine/model set by each orchestrator at run start
 - [x] Hook in `call_tool()` — fire-and-forget `asyncio.create_task`; captures status ok/error/denied, 300-char result snippet, args (truncated at 500 chars)
 - [x] `GET /api/audit/files` — lists available dates for current user (self-service)
 - [x] `GET /api/audit/day?date=` — returns entries for one date (self-service)
 - [x] `GET /api/audit/recent` + `/stats` — cross-user aggregation (admin only)
 - [x] "Audit Log" group in Files panel sidebar (collapsed by default) — read-only table with time/tool/status/model/args/result columns; colour-coded status
 ### [Intelligence] Dev agent pipeline
 See `ARCH__Intelligence_Layer.md`. Full design not yet started.
 - [ ] Specialist agent: frontend (SvelteKit) code changes