feat: model registry Phase 3 — slot-based backend toggle

Backend toggle now cycles through chat role models by label instead of
cycling service type strings (auto/claude/gemini/local).

- model_registry: get_model_for_slot() — resolves a specific priority
  slot without walking the fallback chain
- llm_client: complete() gains slot param; explicit slot selection
  dispatches directly to that model with no silent fallback
- routers/chat.py: ChatRequest.slot; GET /backend returns chat_models
  [{slot, label, type}] for the UI; _stream_chat uses resolved model
  label for the response tag when a slot is pinned
- app.js: toggle loads chat_models from /backend, cycles by label,
  sends slot in chat payload; legacy model field removed from payload
- app.js: fix Gap B — agent mode placeholder no longer says "Gemini
  tool loop"; now says "orchestrator"
- DESIGN doc: updated to reflect phases 1+2 complete, catalog-as-code
  decision, Gap A/B documented, Phase 3 implementation details

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Scott Idem
2026-04-27 21:43:08 -04:00
parent 3bc6b45f9f
commit 962d58d2e2
5 changed files with 248 additions and 296 deletions

View File

@@ -20,7 +20,7 @@ router = APIRouter()
def _backend_label(backend: str, username: str, role: str = "chat") -> str:
"""Human-readable label for the model that handled a request."""
"""Human-readable label for the model that handled a request (legacy path)."""
if backend == "claude":
return "Claude"
if backend == "gemini":
@@ -33,15 +33,24 @@ def _backend_label(backend: str, username: str, role: str = "chat") -> str:
return backend.title()
def _resolve_slot_label(username: str, slot: str) -> str | None:
"""Return the configured model label for a chat role slot, or None."""
cfg = model_registry.get_model_for_slot(username, "chat", slot)
if cfg:
return cfg.get("label") or cfg.get("model_name")
return None
class ChatRequest(BaseModel):
message: str
session_id: str | None = None
tier: int | None = None
model: str | None = None # "claude" or "gemini" to override; None = use primary_backend
model: str | None = None # legacy backend override ("claude"|"gemini"|"local")
slot: str | None = None # Phase 3: role slot ("primary"|"backup_1"|"backup_2")
include_long: bool = True
include_mid: bool = True
include_short: bool = True
off_record: bool = False # skip session log (in-memory context preserved)
off_record: bool = False # skip session log (in-memory context preserved)
user: str = "scott"
persona: str = "inara"
@@ -94,6 +103,7 @@ async def _stream_chat(req: ChatRequest):
system_prompt=system_prompt,
messages=history,
model=req.model,
slot=req.slot,
))
try:
@@ -109,7 +119,11 @@ async def _stream_chat(req: ChatRequest):
try:
response_text, actual_backend = task.result()
backend_label = _backend_label(actual_backend, user, role="chat")
# Use the slot's model label when a slot was pinned; fall back to generic label
if req.slot:
backend_label = _resolve_slot_label(user, req.slot) or _backend_label(actual_backend, user)
else:
backend_label = _backend_label(actual_backend, user, role="chat")
host = platform.node()
history.append({
"role": "assistant",
@@ -164,28 +178,59 @@ _BACKEND_CYCLE = ("claude", "gemini", "local")
_BACKEND_FALLBACK = {"claude": "gemini", "gemini": "claude", "local": "claude"}
def _request_user(request: Request) -> str | None:
"""Extract username from JWT cookie, or None."""
try:
token = request.cookies.get(COOKIE_NAME)
return decode_token(token) if token else None
except (jwt.InvalidTokenError, Exception):
return None
def _local_model_info(request: Request) -> dict | None:
"""Return the best local model {label, model_name} for the session user, or None."""
username = _request_user(request)
if not username:
return None
try:
token = request.cookies.get(COOKIE_NAME)
username = decode_token(token) if token else None
if not username:
return None
cfg = model_registry.get_best_local_model(username, "chat")
if cfg:
return {"label": cfg.get("label", ""), "model_name": cfg.get("model_name", "")}
except (jwt.InvalidTokenError, Exception):
except Exception:
pass
return None
def _chat_models_for_toggle(username: str) -> list[dict]:
"""Return non-empty chat role slots as [{slot, label, type}] for the UI toggle."""
registry = model_registry.get_registry(username)
role_cfg = registry.get("roles", {}).get("chat", {})
result = []
for slot in model_registry.PRIORITY_KEYS[:3]:
model_id = role_cfg.get(slot)
if not model_id:
continue
resolved = model_registry._resolve_model(registry, model_id)
if resolved:
result.append({
"slot": slot,
"label": resolved.get("label") or resolved.get("model_name") or slot,
"type": resolved.get("type", ""),
})
return result
@router.get("/backend")
async def get_backend(request: Request) -> dict:
username = _request_user(request)
chat_models = _chat_models_for_toggle(username) if username else []
p = settings.primary_backend
return {
"primary": p,
"fallback": _BACKEND_FALLBACK.get(p, "claude"),
"local_model": _local_model_info(request),
"chat_models": chat_models,
# Legacy fields kept for backward compat
"primary": p,
"fallback": _BACKEND_FALLBACK.get(p, "claude"),
"local_model": _local_model_info(request),
}