feat: model registry Phase 3 — slot-based backend toggle

Backend toggle now cycles through chat role models by label instead of
cycling service type strings (auto/claude/gemini/local).

- model_registry: get_model_for_slot() — resolves a specific priority
  slot without walking the fallback chain
- llm_client: complete() gains slot param; explicit slot selection
  dispatches directly to that model with no silent fallback
- routers/chat.py: ChatRequest.slot; GET /backend returns chat_models
  [{slot, label, type}] for the UI; _stream_chat uses resolved model
  label for the response tag when a slot is pinned
- app.js: toggle loads chat_models from /backend, cycles by label,
  sends slot in chat payload; legacy model field removed from payload
- app.js: fix Gap B — agent mode placeholder no longer says "Gemini
  tool loop"; now says "orchestrator"
- DESIGN doc: updated to reflect phases 1+2 complete, catalog-as-code
  decision, Gap A/B documented, Phase 3 implementation details

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Scott Idem
2026-04-27 21:43:08 -04:00
parent 3bc6b45f9f
commit 962d58d2e2
5 changed files with 248 additions and 296 deletions

View File

@@ -254,8 +254,8 @@
: 'Private note — only you see this…';
} else if (current_mode === 'agent') {
inputEl.placeholder = ctrlEnterMode
? `Task for ${personaLabel}… (Gemini tool loop — Ctrl+Enter to run)`
: `Task for ${personaLabel}… (Gemini tool loop)`;
? `Task for ${personaLabel}… (orchestrator — Ctrl+Enter to run)`
: `Task for ${personaLabel}… (orchestrator)`;
} else if (current_mode === 'otr') {
inputEl.placeholder = 'Off the record — not logged or distilled…';
} else {
@@ -340,58 +340,48 @@
}
// ── Backend toggle ───────────────────────────────────────────
// null = "auto" — uses role-based routing from model registry
// 'claude' / 'gemini' / 'local' = explicit override
// Phase 3: cycles through the chat role's configured models by label.
// Sends slot ("primary"|"backup_1"|"backup_2") in chat requests.
// Falls back to legacy "auto" behavior when no models are configured.
// On load only fetch local_model hint; don't override primaryBackend default (null)
fetch('/backend').then(r => r.json()).then(d => {
if (backendModelHint && d.local_model) {
// Pre-fill hint in case user is already in local mode
backendModelHint.textContent = d.local_model.label || d.local_model.model_name;
}
});
const BACKEND_CYCLE = [null, 'claude', 'gemini', 'local'];
const BACKEND_CLASS = { claude: '', gemini: 'mem-on', local: 'local-on' };
const TYPE_CLASS = { claude_cli: '', gemini_api: 'mem-on', gemini_cli: 'mem-on', local_openai: 'local-on' };
const backendModelHint = document.getElementById('backend-model-hint');
function setBackendUI(backend, localModel) {
primaryBackend = backend;
backendToggle.textContent = backend === null ? 'auto' : backend;
const extra = backend === null ? '' : (BACKEND_CLASS[backend] || '');
backendToggle.className = 'ctx-btn' + (extra ? ' ' + extra : '');
let chatSlots = []; // [{slot, label, type}] from /backend
let slotIdx = 0; // index into chatSlots; -1 = auto (no registry models)
function activeSlot() {
return chatSlots.length > 0 ? chatSlots[slotIdx] : null;
}
function setToggleUI(entry) {
if (!entry) {
backendToggle.textContent = 'auto';
backendToggle.className = 'ctx-btn';
primaryBackend = null;
} else {
backendToggle.textContent = entry.label;
backendToggle.className = 'ctx-btn ' + (TYPE_CLASS[entry.type] || '');
primaryBackend = entry.slot; // used as legacy compat in payload
}
if (backendModelHint) {
if (backend === 'local' && localModel) {
backendModelHint.textContent = localModel.label || localModel.model_name;
backendModelHint.style.display = '';
} else {
backendModelHint.textContent = '';
backendModelHint.style.display = 'none';
}
backendModelHint.textContent = '';
backendModelHint.style.display = 'none';
}
}
// Initialize to auto mode
setBackendUI(null, null);
fetch('/backend').then(r => r.json()).then(d => {
chatSlots = d.chat_models || [];
slotIdx = 0;
setToggleUI(chatSlots[0] || null);
});
backendToggle.addEventListener('click', async () => {
const idx = BACKEND_CYCLE.indexOf(primaryBackend);
const next = BACKEND_CYCLE[(idx + 1) % BACKEND_CYCLE.length];
if (next === null) {
// Auto: role-based routing — no server call needed
setBackendUI(null, null);
addMessage('system', 'Backend: auto (role-based routing)');
} else {
const res = await fetch('/backend', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ primary: next }),
});
const d = await res.json();
setBackendUI(next, d.local_model);
addMessage('system', `Backend: ${next} (fallback: ${d.fallback})`);
}
backendToggle.addEventListener('click', () => {
if (chatSlots.length === 0) return;
slotIdx = (slotIdx + 1) % chatSlots.length;
const entry = chatSlots[slotIdx];
setToggleUI(entry);
addMessage('system', `Backend: ${entry.label}`);
});
// ── Sessions panel ───────────────────────────────────────────
@@ -1066,7 +1056,7 @@
include_mid: memMid,
include_short: memShort,
off_record: current_mode === 'otr',
model: primaryBackend,
slot: activeSlot()?.slot || null,
user: CORTEX_USER,
persona: CORTEX_PERSONA,
};