feat: model registry Phase 3 — slot-based backend toggle
Backend toggle now cycles through chat role models by label instead of
cycling service type strings (auto/claude/gemini/local).
- model_registry: get_model_for_slot() — resolves a specific priority
slot without walking the fallback chain
- llm_client: complete() gains slot param; explicit slot selection
dispatches directly to that model with no silent fallback
- routers/chat.py: ChatRequest.slot; GET /backend returns chat_models
[{slot, label, type}] for the UI; _stream_chat uses resolved model
label for the response tag when a slot is pinned
- app.js: toggle loads chat_models from /backend, cycles by label,
sends slot in chat payload; legacy model field removed from payload
- app.js: fix Gap B — agent mode placeholder no longer says "Gemini
tool loop"; now says "orchestrator"
- DESIGN doc: updated to reflect phases 1+2 complete, catalog-as-code
decision, Gap A/B documented, Phase 3 implementation details
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -254,8 +254,8 @@
|
||||
: 'Private note — only you see this…';
|
||||
} else if (current_mode === 'agent') {
|
||||
inputEl.placeholder = ctrlEnterMode
|
||||
? `Task for ${personaLabel}… (Gemini tool loop — Ctrl+Enter to run)`
|
||||
: `Task for ${personaLabel}… (Gemini tool loop)`;
|
||||
? `Task for ${personaLabel}… (orchestrator — Ctrl+Enter to run)`
|
||||
: `Task for ${personaLabel}… (orchestrator)`;
|
||||
} else if (current_mode === 'otr') {
|
||||
inputEl.placeholder = 'Off the record — not logged or distilled…';
|
||||
} else {
|
||||
@@ -340,58 +340,48 @@
|
||||
}
|
||||
|
||||
// ── Backend toggle ───────────────────────────────────────────
|
||||
// null = "auto" — uses role-based routing from model registry
|
||||
// 'claude' / 'gemini' / 'local' = explicit override
|
||||
// Phase 3: cycles through the chat role's configured models by label.
|
||||
// Sends slot ("primary"|"backup_1"|"backup_2") in chat requests.
|
||||
// Falls back to legacy "auto" behavior when no models are configured.
|
||||
|
||||
// On load only fetch local_model hint; don't override primaryBackend default (null)
|
||||
fetch('/backend').then(r => r.json()).then(d => {
|
||||
if (backendModelHint && d.local_model) {
|
||||
// Pre-fill hint in case user is already in local mode
|
||||
backendModelHint.textContent = d.local_model.label || d.local_model.model_name;
|
||||
}
|
||||
});
|
||||
|
||||
const BACKEND_CYCLE = [null, 'claude', 'gemini', 'local'];
|
||||
const BACKEND_CLASS = { claude: '', gemini: 'mem-on', local: 'local-on' };
|
||||
const TYPE_CLASS = { claude_cli: '', gemini_api: 'mem-on', gemini_cli: 'mem-on', local_openai: 'local-on' };
|
||||
const backendModelHint = document.getElementById('backend-model-hint');
|
||||
|
||||
function setBackendUI(backend, localModel) {
|
||||
primaryBackend = backend;
|
||||
backendToggle.textContent = backend === null ? 'auto' : backend;
|
||||
const extra = backend === null ? '' : (BACKEND_CLASS[backend] || '');
|
||||
backendToggle.className = 'ctx-btn' + (extra ? ' ' + extra : '');
|
||||
let chatSlots = []; // [{slot, label, type}] from /backend
|
||||
let slotIdx = 0; // index into chatSlots; -1 = auto (no registry models)
|
||||
|
||||
function activeSlot() {
|
||||
return chatSlots.length > 0 ? chatSlots[slotIdx] : null;
|
||||
}
|
||||
|
||||
function setToggleUI(entry) {
|
||||
if (!entry) {
|
||||
backendToggle.textContent = 'auto';
|
||||
backendToggle.className = 'ctx-btn';
|
||||
primaryBackend = null;
|
||||
} else {
|
||||
backendToggle.textContent = entry.label;
|
||||
backendToggle.className = 'ctx-btn ' + (TYPE_CLASS[entry.type] || '');
|
||||
primaryBackend = entry.slot; // used as legacy compat in payload
|
||||
}
|
||||
if (backendModelHint) {
|
||||
if (backend === 'local' && localModel) {
|
||||
backendModelHint.textContent = localModel.label || localModel.model_name;
|
||||
backendModelHint.style.display = '';
|
||||
} else {
|
||||
backendModelHint.textContent = '';
|
||||
backendModelHint.style.display = 'none';
|
||||
}
|
||||
backendModelHint.textContent = '';
|
||||
backendModelHint.style.display = 'none';
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize to auto mode
|
||||
setBackendUI(null, null);
|
||||
fetch('/backend').then(r => r.json()).then(d => {
|
||||
chatSlots = d.chat_models || [];
|
||||
slotIdx = 0;
|
||||
setToggleUI(chatSlots[0] || null);
|
||||
});
|
||||
|
||||
backendToggle.addEventListener('click', async () => {
|
||||
const idx = BACKEND_CYCLE.indexOf(primaryBackend);
|
||||
const next = BACKEND_CYCLE[(idx + 1) % BACKEND_CYCLE.length];
|
||||
if (next === null) {
|
||||
// Auto: role-based routing — no server call needed
|
||||
setBackendUI(null, null);
|
||||
addMessage('system', 'Backend: auto (role-based routing)');
|
||||
} else {
|
||||
const res = await fetch('/backend', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ primary: next }),
|
||||
});
|
||||
const d = await res.json();
|
||||
setBackendUI(next, d.local_model);
|
||||
addMessage('system', `Backend: ${next} (fallback: ${d.fallback})`);
|
||||
}
|
||||
backendToggle.addEventListener('click', () => {
|
||||
if (chatSlots.length === 0) return;
|
||||
slotIdx = (slotIdx + 1) % chatSlots.length;
|
||||
const entry = chatSlots[slotIdx];
|
||||
setToggleUI(entry);
|
||||
addMessage('system', `Backend: ${entry.label}`);
|
||||
});
|
||||
|
||||
// ── Sessions panel ───────────────────────────────────────────
|
||||
@@ -1066,7 +1056,7 @@
|
||||
include_mid: memMid,
|
||||
include_short: memShort,
|
||||
off_record: current_mode === 'otr',
|
||||
model: primaryBackend,
|
||||
slot: activeSlot()?.slot || null,
|
||||
user: CORTEX_USER,
|
||||
persona: CORTEX_PERSONA,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user