feat: model registry Phase 3 — slot-based backend toggle

Backend toggle now cycles through chat role models by label instead of
cycling service type strings (auto/claude/gemini/local).

- model_registry: get_model_for_slot() — resolves a specific priority
  slot without walking the fallback chain
- llm_client: complete() gains slot param; explicit slot selection
  dispatches directly to that model with no silent fallback
- routers/chat.py: ChatRequest.slot; GET /backend returns chat_models
  [{slot, label, type}] for the UI; _stream_chat uses resolved model
  label for the response tag when a slot is pinned
- app.js: toggle loads chat_models from /backend, cycles by label,
  sends slot in chat payload; legacy model field removed from payload
- app.js: fix Gap B — agent mode placeholder no longer says "Gemini
  tool loop"; now says "orchestrator"
- DESIGN doc: updated to reflect phases 1+2 complete, catalog-as-code
  decision, Gap A/B documented, Phase 3 implementation details

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Scott Idem
2026-04-27 21:43:08 -04:00
parent 3bc6b45f9f
commit 962d58d2e2
5 changed files with 248 additions and 296 deletions

View File

@@ -49,14 +49,17 @@ async def complete(
messages: list[dict],
model: str | None = None,
role: str = "chat",
slot: str | None = None,
max_tokens: int = 2048,
) -> tuple[str, str]:
"""
Returns (response_text, actual_backend_used).
model: explicit backend override ("claude" | "gemini" | "local") from UI toggle.
slot: Phase 3 — specific role slot ("primary" | "backup_1" | "backup_2").
Resolves that exact slot, no fallback chain. Takes priority over model.
model: legacy backend override ("claude" | "gemini" | "local") from old toggle.
None = resolve via model registry for the given role.
role: registry role used when model is None (default: "chat").
role: registry role used for slot/auto routing (default: "chat").
"""
import model_registry as _reg
from persona import _user
@@ -64,21 +67,31 @@ async def complete(
username = _user.get()
resolved_cfg: dict | None = None
if model in _EXPLICIT_BACKENDS:
# User explicitly selected a backend in the UI
if model == "local":
resolved_cfg = _reg.get_best_local_model(username, role)
if not resolved_cfg:
raise RuntimeError("No local model configured — add one at /settings/models")
primary = model
else:
# Role-based routing via model registry
resolved = _reg.get_model_for_role(username, role)
if resolved:
resolved_cfg = resolved
primary = _TYPE_TO_BACKEND.get(resolved["type"], "claude")
if slot is not None:
# Phase 3: explicit slot selection — no fallback within the role
resolved_cfg = _reg.get_model_for_slot(username, role, slot)
if resolved_cfg:
primary = _TYPE_TO_BACKEND.get(resolved_cfg["type"], "claude")
else:
primary = settings.primary_backend
# Slot not configured — fall through to auto routing
slot = None
if slot is None:
if model in _EXPLICIT_BACKENDS:
# Legacy: explicit backend override from old UI toggle
if model == "local":
resolved_cfg = _reg.get_best_local_model(username, role)
if not resolved_cfg:
raise RuntimeError("No local model configured — add one at /settings/models")
primary = model
else:
# Auto: role-based routing via model registry
resolved = _reg.get_model_for_role(username, role)
if resolved:
resolved_cfg = resolved
primary = _TYPE_TO_BACKEND.get(resolved["type"], "claude")
else:
primary = settings.primary_backend
fallback = _FALLBACK.get(primary, "claude")
@@ -89,9 +102,7 @@ async def complete(
err_str = str(e)
if primary == "claude" and any(k in err_str for k in ("401", "authenticate", "expired", "OAuth")):
await event_bus.publish({"type": "claude_auth_expired"})
# Only fall back when using a default/auto backend.
# If the user has explicitly configured a model via the registry,
# surface the error so they know something is wrong.
# Surface errors when a model is explicitly configured or a specific slot was pinned.
if resolved_cfg is not None:
logger.error("%s failed (no fallback — model explicitly configured): %s", primary, e)
raise

View File

@@ -415,6 +415,23 @@ def get_best_local_model(username: str, role: str = "chat") -> dict | None:
return None
def get_model_for_slot(username: str, role: str, slot: str) -> dict | None:
"""
Resolve a single named priority slot from a role without walking the fallback chain.
Used by Phase 3 explicit slot selection — the user has pinned a specific model;
don't silently redirect to another slot if this one is empty or broken.
Returns None if the slot is unset or the model can't be resolved.
"""
if slot not in PRIORITY_KEYS:
return None
registry = _load(username)
model_id = registry.get("roles", {}).get(role, {}).get(slot)
if not model_id:
return None
return _resolve_model(registry, model_id)
def get_google_api_key(username: str, account_id: str | None = None) -> str | None:
"""
Return the best available Gemini API key for the user.

View File

@@ -20,7 +20,7 @@ router = APIRouter()
def _backend_label(backend: str, username: str, role: str = "chat") -> str:
"""Human-readable label for the model that handled a request."""
"""Human-readable label for the model that handled a request (legacy path)."""
if backend == "claude":
return "Claude"
if backend == "gemini":
@@ -33,15 +33,24 @@ def _backend_label(backend: str, username: str, role: str = "chat") -> str:
return backend.title()
def _resolve_slot_label(username: str, slot: str) -> str | None:
"""Return the configured model label for a chat role slot, or None."""
cfg = model_registry.get_model_for_slot(username, "chat", slot)
if cfg:
return cfg.get("label") or cfg.get("model_name")
return None
class ChatRequest(BaseModel):
message: str
session_id: str | None = None
tier: int | None = None
model: str | None = None # "claude" or "gemini" to override; None = use primary_backend
model: str | None = None # legacy backend override ("claude"|"gemini"|"local")
slot: str | None = None # Phase 3: role slot ("primary"|"backup_1"|"backup_2")
include_long: bool = True
include_mid: bool = True
include_short: bool = True
off_record: bool = False # skip session log (in-memory context preserved)
off_record: bool = False # skip session log (in-memory context preserved)
user: str = "scott"
persona: str = "inara"
@@ -94,6 +103,7 @@ async def _stream_chat(req: ChatRequest):
system_prompt=system_prompt,
messages=history,
model=req.model,
slot=req.slot,
))
try:
@@ -109,7 +119,11 @@ async def _stream_chat(req: ChatRequest):
try:
response_text, actual_backend = task.result()
backend_label = _backend_label(actual_backend, user, role="chat")
# Use the slot's model label when a slot was pinned; fall back to generic label
if req.slot:
backend_label = _resolve_slot_label(user, req.slot) or _backend_label(actual_backend, user)
else:
backend_label = _backend_label(actual_backend, user, role="chat")
host = platform.node()
history.append({
"role": "assistant",
@@ -164,28 +178,59 @@ _BACKEND_CYCLE = ("claude", "gemini", "local")
_BACKEND_FALLBACK = {"claude": "gemini", "gemini": "claude", "local": "claude"}
def _request_user(request: Request) -> str | None:
"""Extract username from JWT cookie, or None."""
try:
token = request.cookies.get(COOKIE_NAME)
return decode_token(token) if token else None
except (jwt.InvalidTokenError, Exception):
return None
def _local_model_info(request: Request) -> dict | None:
"""Return the best local model {label, model_name} for the session user, or None."""
username = _request_user(request)
if not username:
return None
try:
token = request.cookies.get(COOKIE_NAME)
username = decode_token(token) if token else None
if not username:
return None
cfg = model_registry.get_best_local_model(username, "chat")
if cfg:
return {"label": cfg.get("label", ""), "model_name": cfg.get("model_name", "")}
except (jwt.InvalidTokenError, Exception):
except Exception:
pass
return None
def _chat_models_for_toggle(username: str) -> list[dict]:
"""Return non-empty chat role slots as [{slot, label, type}] for the UI toggle."""
registry = model_registry.get_registry(username)
role_cfg = registry.get("roles", {}).get("chat", {})
result = []
for slot in model_registry.PRIORITY_KEYS[:3]:
model_id = role_cfg.get(slot)
if not model_id:
continue
resolved = model_registry._resolve_model(registry, model_id)
if resolved:
result.append({
"slot": slot,
"label": resolved.get("label") or resolved.get("model_name") or slot,
"type": resolved.get("type", ""),
})
return result
@router.get("/backend")
async def get_backend(request: Request) -> dict:
username = _request_user(request)
chat_models = _chat_models_for_toggle(username) if username else []
p = settings.primary_backend
return {
"primary": p,
"fallback": _BACKEND_FALLBACK.get(p, "claude"),
"local_model": _local_model_info(request),
"chat_models": chat_models,
# Legacy fields kept for backward compat
"primary": p,
"fallback": _BACKEND_FALLBACK.get(p, "claude"),
"local_model": _local_model_info(request),
}

View File

@@ -254,8 +254,8 @@
: 'Private note — only you see this…';
} else if (current_mode === 'agent') {
inputEl.placeholder = ctrlEnterMode
? `Task for ${personaLabel}… (Gemini tool loop — Ctrl+Enter to run)`
: `Task for ${personaLabel}… (Gemini tool loop)`;
? `Task for ${personaLabel}… (orchestrator — Ctrl+Enter to run)`
: `Task for ${personaLabel}… (orchestrator)`;
} else if (current_mode === 'otr') {
inputEl.placeholder = 'Off the record — not logged or distilled…';
} else {
@@ -340,58 +340,48 @@
}
// ── Backend toggle ───────────────────────────────────────────
// null = "auto" — uses role-based routing from model registry
// 'claude' / 'gemini' / 'local' = explicit override
// Phase 3: cycles through the chat role's configured models by label.
// Sends slot ("primary"|"backup_1"|"backup_2") in chat requests.
// Falls back to legacy "auto" behavior when no models are configured.
// On load only fetch local_model hint; don't override primaryBackend default (null)
fetch('/backend').then(r => r.json()).then(d => {
if (backendModelHint && d.local_model) {
// Pre-fill hint in case user is already in local mode
backendModelHint.textContent = d.local_model.label || d.local_model.model_name;
}
});
const BACKEND_CYCLE = [null, 'claude', 'gemini', 'local'];
const BACKEND_CLASS = { claude: '', gemini: 'mem-on', local: 'local-on' };
const TYPE_CLASS = { claude_cli: '', gemini_api: 'mem-on', gemini_cli: 'mem-on', local_openai: 'local-on' };
const backendModelHint = document.getElementById('backend-model-hint');
function setBackendUI(backend, localModel) {
primaryBackend = backend;
backendToggle.textContent = backend === null ? 'auto' : backend;
const extra = backend === null ? '' : (BACKEND_CLASS[backend] || '');
backendToggle.className = 'ctx-btn' + (extra ? ' ' + extra : '');
let chatSlots = []; // [{slot, label, type}] from /backend
let slotIdx = 0; // index into chatSlots; -1 = auto (no registry models)
function activeSlot() {
return chatSlots.length > 0 ? chatSlots[slotIdx] : null;
}
function setToggleUI(entry) {
if (!entry) {
backendToggle.textContent = 'auto';
backendToggle.className = 'ctx-btn';
primaryBackend = null;
} else {
backendToggle.textContent = entry.label;
backendToggle.className = 'ctx-btn ' + (TYPE_CLASS[entry.type] || '');
primaryBackend = entry.slot; // used as legacy compat in payload
}
if (backendModelHint) {
if (backend === 'local' && localModel) {
backendModelHint.textContent = localModel.label || localModel.model_name;
backendModelHint.style.display = '';
} else {
backendModelHint.textContent = '';
backendModelHint.style.display = 'none';
}
backendModelHint.textContent = '';
backendModelHint.style.display = 'none';
}
}
// Initialize to auto mode
setBackendUI(null, null);
fetch('/backend').then(r => r.json()).then(d => {
chatSlots = d.chat_models || [];
slotIdx = 0;
setToggleUI(chatSlots[0] || null);
});
backendToggle.addEventListener('click', async () => {
const idx = BACKEND_CYCLE.indexOf(primaryBackend);
const next = BACKEND_CYCLE[(idx + 1) % BACKEND_CYCLE.length];
if (next === null) {
// Auto: role-based routing — no server call needed
setBackendUI(null, null);
addMessage('system', 'Backend: auto (role-based routing)');
} else {
const res = await fetch('/backend', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ primary: next }),
});
const d = await res.json();
setBackendUI(next, d.local_model);
addMessage('system', `Backend: ${next} (fallback: ${d.fallback})`);
}
backendToggle.addEventListener('click', () => {
if (chatSlots.length === 0) return;
slotIdx = (slotIdx + 1) % chatSlots.length;
const entry = chatSlots[slotIdx];
setToggleUI(entry);
addMessage('system', `Backend: ${entry.label}`);
});
// ── Sessions panel ───────────────────────────────────────────
@@ -1066,7 +1056,7 @@
include_mid: memMid,
include_short: memShort,
off_record: current_mode === 'otr',
model: primaryBackend,
slot: activeSlot()?.slot || null,
user: CORTEX_USER,
persona: CORTEX_PERSONA,
};