From 3716e5974f68bdfbbde403e4bb297712e38dad5a Mon Sep 17 00:00:00 2001
From: Scott Idem <stidem@gmail.com>
Date: Tue, 12 May 2026 21:32:43 -0400
Subject: [PATCH] =?UTF-8?q?feat:=20Phase=203=20model=20toggle=20=E2=80=94?=
 =?UTF-8?q?=20cycle=20chat-role=20slot=20models=20in=20UI?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaces the role-cycle toggle with a slot model toggle in the Context &
Memory panel. The active model label is shown on the button; clicking cycles
through Primary → Backup 1 → Backup 2 slots configured for the Chat role.

- app.js: remove activeRole()/availableRoles role-cycling; add
  activeChatModel()/chatModels slot cycling; update send/orchestrate
  payloads to send slot + chat_role:"chat"; fix updateSendBtnTitle and
  startRunTimer to use activeChatModel()
- chat.py: add slot field to ChatRequest; pass slot= to complete();
  resolve backend_label from slot config; add _chat_slot_models() helper;
  include chat_models in GET /backend response
- HELP.md: update Model toggle description, tool count (62/16),
  Backends section, API chat payload example

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 cortex/routers/chat.py | 31 ++++++++++++++++--
 cortex/static/HELP.md  | 39 +++++++++++-----------
 cortex/static/app.js   | 73 ++++++++++++++++++++----------------------
 3 files changed, 83 insertions(+), 60 deletions(-)

diff --git a/cortex/routers/chat.py b/cortex/routers/chat.py
index da8e920..85ed050 100644
--- a/cortex/routers/chat.py
+++ b/cortex/routers/chat.py
@@ -47,6 +47,7 @@ class ChatRequest(BaseModel):
     session_id: str | None = None
     tier: int | None = None
     model: str | None = None        # legacy backend override ("claude"|"gemini"|"local")
+    slot: str | None = None         # Phase 3: explicit slot ("primary"|"backup_1"|"backup_2")
     chat_role: str = "chat"         # active role: "chat"|"coder"|"research"|"distill" etc.
     include_long: bool = True
     include_mid: bool = True
@@ -109,6 +110,7 @@ async def _stream_chat(req: ChatRequest):
         messages=history,
         model=req.model,
         role=req.chat_role,
+        slot=req.slot,
     ))
 
     try:
@@ -124,7 +126,11 @@ async def _stream_chat(req: ChatRequest):
 
         try:
             response_text, actual_backend = task.result()
-            backend_label = _role_model_label(user, req.chat_role, actual_backend)
+            if req.slot:
+                slot_cfg = model_registry.get_model_for_slot(user, req.chat_role, req.slot)
+                backend_label = (slot_cfg or {}).get("label") or _role_model_label(user, req.chat_role, actual_backend)
+            else:
+                backend_label = _role_model_label(user, req.chat_role, actual_backend)
             host = platform.node()
             history.append({
                 "role": "assistant",
@@ -203,6 +209,25 @@ def _local_model_info(request: Request) -> dict | None:
     return None
 
 
+def _chat_slot_models(username: str) -> list[dict]:
+    """Return [{slot, label, type}] for each configured slot in the chat role, primary first."""
+    registry = model_registry.get_registry(username)
+    role_slots = registry.get("roles", {}).get("chat", {})
+    result = []
+    for slot_key in model_registry.PRIORITY_KEYS:
+        model_id = role_slots.get(slot_key)
+        if not model_id:
+            continue
+        resolved = model_registry._resolve_model(registry, model_id)
+        if resolved:
+            result.append({
+                "slot":  slot_key,
+                "label": resolved.get("label") or resolved.get("model_name") or "",
+                "type":  resolved.get("type", ""),
+            })
+    return result
+
+
 def _available_roles_for_toggle(username: str) -> list[dict]:
     """Return roles with a primary model assigned (excluding orchestrator) for the UI toggle.
 
@@ -231,6 +256,7 @@ def _available_roles_for_toggle(username: str) -> list[dict]:
 @router.get("/backend")
 async def get_backend(request: Request) -> dict:
     username        = _request_user(request)
+    chat_models     = _chat_slot_models(username) if username else []
     available_roles = _available_roles_for_toggle(username) if username else []
     p = settings.primary_backend
 
@@ -241,7 +267,8 @@ async def get_backend(request: Request) -> dict:
             orch_label = orch_cfg.get("label") or orch_cfg.get("model_name") or None
 
     return {
-        "available_roles":   available_roles,
+        "chat_models":        chat_models,       # Phase 3: [{slot, label, type}] for chat-role slots
+        "available_roles":    available_roles,    # kept for banner + backward compat
         "orchestrator_model": orch_label,
         # Legacy fields kept for backward compat
         "primary":     p,
diff --git a/cortex/static/HELP.md b/cortex/static/HELP.md
index 2e667c5..c07eea0 100644
--- a/cortex/static/HELP.md
+++ b/cortex/static/HELP.md
@@ -6,7 +6,7 @@
      and are appended automatically by help.html when present.
 -->
 
-*Last updated: 2026-05-09*
+*Last updated: 2026-05-12*
 
 ---
 
@@ -43,7 +43,7 @@ The **Context & Memory** panel (sliders icon with tier number) contains all conf
 | **Context Tier** | T1 – T4 context depth |
 | **Memory Layers** | Toggle Long / Mid / Short memory on/off |
 | **Distill Memory** | Manually trigger Short / Mid / Long / All distillation |
-| **Role** | Active LLM role — click to cycle through configured role assignments |
+| **Model** | Active chat model — click to cycle through your configured slot models (Primary → Backup 1 → …) |
 | **Display** | **Aa** cycles font size · **☾** toggles theme · **S/M/L** cycles input area height · **⌃↵** toggles send shortcut |
 
 All settings persist in `localStorage` across page refreshes.
@@ -82,12 +82,14 @@ Orchestrated sessions persist to history exactly like regular chat.
 
 ### Available Tools
 
-50 tools across 12 categories. Each tool schema is sent to the model on every orchestrated call — fewer active tools means fewer tokens per call.
+62 tools across 16 categories. Each tool schema is sent to the model on every orchestrated call — fewer active tools means fewer tokens per call.
 
 | Category | Tools |
 |---|---|
 | **Web** | `web_search`, `http_fetch`, `web_read`, `http_post` |
-| **Files** | `file_read`, `file_list`, `file_write`, `session_read`, `session_search` |
+| **Project Files** | `project_file_read`, `project_file_list`, `file_stat`, `file_grep`, `file_diff`, `file_syntax_check` |
+| **Files** (admin) | `file_read`, `file_list`, `file_write`, `session_read`, `session_search` |
+| **Git** | `git_status`, `git_log`, `git_diff` |
 | **Shell** | `shell_exec`, `claude_allow_dir` |
 | **System** | `cortex_restart`, `cortex_logs`, `cortex_status`, `cortex_update` |
 | **Tasks** | `task_list`, `task_create`, `task_update`, `task_complete` |
@@ -96,10 +98,12 @@ Orchestrated sessions persist to history exactly like regular chat.
 | **Scratchpad** | `scratch_read`, `scratch_write`, `scratch_append`, `scratch_clear` |
 | **Notifications** | `web_push`, `email_send`, `nc_talk_send`, `nc_talk_history` |
 | **Aether Journals** | `ae_journal_list/search`, `ae_journal_entries_list`, `ae_journal_entry_read/create/update/disable/append/prepend` |
+| **Aether Tasks** | `ae_task_list` |
 | **Agent Notes** | `agent_notes_read`, `agent_notes_write`, `agent_notes_append`, `agent_notes_clear` |
 | **Agents** | `spawn_agent` |
+| **Home Assistant** | `ha_get_state`, `ha_get_states`, `ha_call_service` |
 
-File, Shell, System, Agents, and some Notification/Web tools are **admin-only** and not visible to regular users.
+Files, Shell, System, Agents, and some Notification/Web tools are **admin-only** and not visible to regular users.
 `http_post` requires a URL prefix allowlist in `home/{user}/http_allowlist.json`.
 `nc_talk_history` requires `nc_username` and `nc_app_password` in `channels.json` under `nextcloud`.
 
@@ -149,21 +153,14 @@ Once installed, opening Cortex from the home screen or app launcher skips the br
 
 ## Backends
 
-Three backends are available:
+The **Model** toggle in the Context & Memory panel cycles through the slot models configured for your Chat role (Primary → Backup 1 → Backup 2 → …). Click it to switch between models mid-session.
 
-| Backend | What it is |
-|---|---|
-| **Claude** | Anthropic Claude via the Claude CLI (OAuth — no API key needed) |
-| **Gemini** | Google Gemini via the Gemini CLI |
-| **Local** | Any OpenAI-compatible endpoint (Open WebUI, Ollama, OpenRouter, etc.) |
+- The button label shows the active model (e.g. "GPT-4o", "Gemini 2.5 Flash")
+- The selected slot is sent with each chat request so the correct model is used
+- If only one model is configured, the toggle does nothing
+- A system message appears in the chat when you switch models
 
-The **Role** toggle in the Context & Memory panel cycles through configured role assignments. Each role maps to a Primary / Backup 1 / Backup 2 model chain set in the Model Registry.
-
-- The active model label appears below the toggle button
-- `auto` (default) uses the model assigned to the `chat` role in your Model Registry
-- Forcing a specific backend overrides the role assignment for that session
-
-If the active backend fails, a fallback is tried automatically. A **⚡** badge appears on the response when this happens.
+If the active model fails, the next configured backup slot is tried automatically.
 
 Each response shows a **model tag** (bottom-right of message) with the model label and host, so you always know what responded.
 
@@ -447,10 +444,12 @@ Chat request body (`POST /chat`):
   "message": "string",
   "session_id": "string | null",
   "tier": 2,
-  "model": "claude | gemini | local | null",
+  "chat_role": "chat",
+  "slot": "primary | backup_1 | backup_2 | null",
   "include_long": true,
   "include_mid": true,
-  "include_short": true
+  "include_short": true,
+  "off_record": false
 }
 ```
 
diff --git a/cortex/static/app.js b/cortex/static/app.js
index 0abceed..5b28249 100644
--- a/cortex/static/app.js
+++ b/cortex/static/app.js
@@ -313,8 +313,8 @@
         });
 
         // ── Tools toggle ─────────────────────────────────────────────
-        // When on: submit goes to POST /orchestrate (Gemini tool loop → active role responds).
-        // When off: submit goes to POST /chat (direct to active role, no tools).
+        // When on: submit goes to POST /orchestrate (orchestrator tool loop → active model responds).
+        // When off: submit goes to POST /chat (direct to active model, no tools).
         let toolsEnabled = localStorage.getItem('tools-enabled') === 'true';
         let _runStart = 0;
         let _runTimer = null;
@@ -335,9 +335,8 @@
         });
 
         function updateSendBtnTitle() {
-            const role    = activeRole();
-            const rmodel  = role?.model_label || '(server default)';
-            const rname   = role?.label || 'Chat';
+            const entry   = activeChatModel();
+            const rmodel  = entry?.label || '(server default)';
             const mode    = current_mode === 'otr' ? 'Off The Record'
                           : current_mode === 'note' ? 'Note'
                           : 'Chat';
@@ -347,13 +346,13 @@
             if (useOrch) {
                 const omodel = orchestratorModel || '(server default)';
                 lines = [
-                    `Role: ${rname}  ·  ${rmodel}`,
+                    `Model: ${rmodel}`,
                     `Orchestrator: ${omodel}  (tool loop)`,
                     `Mode: ${mode}`,
                 ];
             } else {
                 lines = [
-                    `Role: ${rname}  ·  ${rmodel}`,
+                    `Model: ${rmodel}`,
                     `Mode: ${mode}`,
                     `Engine: Direct (no tool loop)`,
                 ];
@@ -364,14 +363,13 @@
         function startRunTimer() {
             _runStart = Date.now();
             function tick() {
-                const secs  = Math.floor((Date.now() - _runStart) / 1000);
-                const role  = activeRole();
-                const rname = role?.label || 'Chat';
+                const secs    = Math.floor((Date.now() - _runStart) / 1000);
+                const entry   = activeChatModel();
                 const useOrch = toolsEnabled && current_mode !== 'note';
-                const model = useOrch
+                const model   = useOrch
                     ? (orchestratorModel || '(server default)') + '  (tool loop)'
-                    : (role?.model_label || '(server default)');
-                stopBtn.title = `Running: ${rname} · ${model}\nElapsed: ${secs}s — click to cancel`;
+                    : (entry?.label || '(server default)');
+                stopBtn.title = `Running: Chat · ${model}\nElapsed: ${secs}s — click to cancel`;
             }
             tick();
             _runTimer = setInterval(tick, 1000);
@@ -469,23 +467,24 @@
             document.addEventListener('click', () => personaDropEl.classList.remove('open'));
         }
 
-        // ── Role toggle ──────────────────────────────────────────────
-        // Cycles through roles that have a primary model assigned (excluding orchestrator).
-        // Sends chat_role ("chat"|"coder"|"research"|...) in chat requests.
-        // Falls back to "chat" when no roles are configured in the registry.
+        // ── Model toggle (Phase 3) ───────────────────────────────────
+        // Cycles through the chat role's configured slot models (primary → backup_1 → …).
+        // Shows the model label on the button; sends slot + chat_role:"chat" in requests.
+        // Falls back to "chat" / no slot when no models are configured.
 
         const TYPE_CLASS = { claude_cli: '', gemini_api: 'mem-on', gemini_cli: 'mem-on', local_openai: 'local-on' };
         const backendModelHint = document.getElementById('backend-model-hint');
 
-        let availableRoles  = [];  // [{role, label, model_label, type}] from /backend
-        let roleIdx         = 0;
-        let orchestratorModel = null;  // label of the orchestrator-role model
+        let chatModels      = [];  // [{slot, label, type}] for chat-role slots
+        let availableRoles  = [];  // [{role, label, model_label, type}] — kept for banner check
+        let modelIdx        = 0;
+        let orchestratorModel = null;
 
-        function activeRole() {
-            return availableRoles.length > 0 ? availableRoles[roleIdx] : null;
+        function activeChatModel() {
+            return chatModels.length > 0 ? chatModels[modelIdx] : null;
         }
 
-        function setRoleToggleUI(entry) {
+        function setModelToggleUI(entry) {
             if (!entry) {
                 backendToggle.textContent = 'chat';
                 backendToggle.className   = 'ctx-btn';
@@ -493,19 +492,16 @@
                 backendToggle.textContent = entry.label;
                 backendToggle.className   = 'ctx-btn ' + (TYPE_CLASS[entry.type] || '');
             }
-            if (backendModelHint) {
-                const hint = entry?.model_label || '';
-                backendModelHint.textContent   = hint;
-                backendModelHint.style.display = hint ? '' : 'none';
-            }
+            if (backendModelHint) backendModelHint.style.display = 'none';
             updateSendBtnTitle();
         }
 
         fetch('/backend').then(r => r.json()).then(d => {
+            chatModels        = d.chat_models     || [];
             availableRoles    = d.available_roles || [];
             orchestratorModel = d.orchestrator_model || null;
-            roleIdx           = 0;
-            setRoleToggleUI(availableRoles[0] || null);
+            modelIdx          = 0;
+            setModelToggleUI(chatModels[0] || null);
             _maybeShowNoBanner(availableRoles);
         });
 
@@ -527,17 +523,16 @@
                         style="background:none;border:none;color:#78350f;cursor:pointer;font-size:1rem;line-height:1;padding:0 0.2rem;"
                         title="Dismiss">✕</button>
             `;
-            // Insert at the top of #chat-col (or body if not found)
             const col = document.getElementById('chat-col') || document.body.firstElementChild;
             col.insertBefore(banner, col.firstChild);
         }
 
         backendToggle.addEventListener('click', () => {
-            if (availableRoles.length <= 1) return;
-            roleIdx = (roleIdx + 1) % availableRoles.length;
-            const entry = availableRoles[roleIdx];
-            setRoleToggleUI(entry);
-            addMessage('system', `Role: ${entry.label} · ${entry.model_label}`);
+            if (chatModels.length <= 1) return;
+            modelIdx = (modelIdx + 1) % chatModels.length;
+            const entry = chatModels[modelIdx];
+            setModelToggleUI(entry);
+            addMessage('system', `Model: ${entry.label}`);
         });
 
         // ── Sessions panel ───────────────────────────────────────────
@@ -1346,7 +1341,8 @@
                 include_mid: memMid,
                 include_short: memShort,
                 off_record: isOtr,
-                chat_role: activeRole()?.role || 'chat',
+                chat_role: 'chat',
+                slot: activeChatModel()?.slot || null,
                 user: CORTEX_USER,
                 persona: CORTEX_PERSONA,
             };
@@ -1377,7 +1373,8 @@
                         include_mid: memMid,
                         include_short: memShort,
                         off_record: current_mode === 'otr',
-                        chat_role: activeRole()?.role || 'chat',
+                        chat_role: 'chat',
+                        slot: activeChatModel()?.slot || null,
                         user: CORTEX_USER,
                         persona: CORTEX_PERSONA,
                     }),