feat: unified model registry with role-based routing

Introduces model_registry.py as the single source of truth for all LLM backend configuration. Replaces scattered backend settings across user_settings, config distill_backend_*, and the UI toggle. model_registry.py: - Per-user home/{user}/model_registry.json with version, hosts, models, roles - Models have: type (local_openai|claude_cli|gemini_cli|gemini_api), label, model_name, host_id, context_k (tokens), tags (capability labels) - Roles map to priority chains: primary, backup_1..backup_4 - Built-in IDs (claude_cli, gemini_cli, gemini_api) always resolvable - Auto-migrates existing local_llm.json on first access - CRUD: save_host, remove_host, save_model, remove_model, set_role - get_model_for_role(): registry → .env default → hardcoded fallback config.py: - role_chat/orchestrator/distill/coder/research .env defaults - defined_roles: comma-separated standard role list (extensible) - get_defined_roles() and get_role_default() helper methods llm_client.complete(): - New role= parameter (default "chat") for registry-based routing - model= still accepted for explicit UI toggle override - _claude() and _local() accept model_cfg dict instead of raw string - _local() uses pre-resolved config from registry memory_distiller.py: - distill_mid/long now use role="distill" (no more distill_backend_* .env vars needed) cron_runner.py: - brief jobs use role="chat" routers/chat.py + auth.py: - Use model_registry instead of user_settings for local model info Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-05 21:25:18 -04:00
parent a4daebdc9b
commit 6a1a1c2686
7 changed files with 541 additions and 33 deletions
--- a/cortex/routers/auth.py
+++ b/cortex/routers/auth.py
@@ -72,21 +72,33 @@ def _gemini_status() -> dict:
        return {"ok": False, "error": str(e), "warning": True, "authenticated": False}


-async def _local_status() -> dict:
-    if not settings.local_api_url:
+async def _local_status(username: str = "scott") -> dict:
+    """Check reachability of the user's configured local model host."""
+    import model_registry
+    cfg = model_registry.get_best_local_model(username)
+    if not cfg:
+        return {"configured": False}
+    api_url = cfg.get("api_url", "")
+    if not api_url:
        return {"configured": False}
    try:
        import httpx
-        url = settings.local_api_url.rstrip("/") + "/api/models"
+        url = api_url.rstrip("/") + "/api/models"
        headers = {}
-        if settings.local_api_key:
-            headers["Authorization"] = f"Bearer {settings.local_api_key}"
+        api_key = cfg.get("api_key", "")
+        if api_key:
+            headers["Authorization"] = f"Bearer {api_key}"
        async with httpx.AsyncClient(timeout=5) as client:
            resp = await client.get(url, headers=headers)
        reachable = resp.status_code < 400
-        return {"configured": True, "reachable": reachable, "model": settings.local_model}
+        return {
+            "configured": True,
+            "reachable": reachable,
+            "model": cfg.get("model_name", ""),
+            "label": cfg.get("label", ""),
+        }
    except Exception as e:
-        return {"configured": True, "reachable": False, "error": str(e), "model": settings.local_model}
+        return {"configured": True, "reachable": False, "error": str(e), "model": cfg.get("model_name", "")}


@router.get("/status")
--- a/cortex/routers/chat.py
+++ b/cortex/routers/chat.py
@@ -11,7 +11,7 @@ from session_store import load as load_session, save as save_session, list_all,
 from config import settings
 from persona import set_context, validate as validate_persona
 from auth_utils import COOKIE_NAME, decode_token
-import user_settings
+import model_registry
 import event_bus


@@ -138,15 +138,15 @@ _BACKEND_FALLBACK = {"claude": "gemini", "gemini": "claude", "local": "claude"}


 def _local_model_info(request: Request) -> dict | None:
-    """Return active local model {label, model_name} for the session user, or None."""
+    """Return the best local model {label, model_name} for the session user, or None."""
    try:
        token    = request.cookies.get(COOKIE_NAME)
        username = decode_token(token) if token else None
        if not username:
            return None
-        cfg = user_settings.get_active_local_model(username)
+        cfg = model_registry.get_best_local_model(username, "chat")
        if cfg:
-            return {"label": cfg["label"], "model_name": cfg["model_name"]}
+            return {"label": cfg.get("label", ""), "model_name": cfg.get("model_name", "")}
    except (jwt.InvalidTokenError, Exception):
        pass
    return None