feat: unified model registry with role-based routing
Introduces model_registry.py as the single source of truth for all LLM
backend configuration. Replaces scattered backend settings across user_settings,
config distill_backend_*, and the UI toggle.
model_registry.py:
- Per-user home/{user}/model_registry.json with version, hosts, models, roles
- Models have: type (local_openai|claude_cli|gemini_cli|gemini_api), label,
model_name, host_id, context_k (tokens), tags (capability labels)
- Roles map to priority chains: primary, backup_1..backup_4
- Built-in IDs (claude_cli, gemini_cli, gemini_api) always resolvable
- Auto-migrates existing local_llm.json on first access
- CRUD: save_host, remove_host, save_model, remove_model, set_role
- get_model_for_role(): registry → .env default → hardcoded fallback
config.py:
- role_chat/orchestrator/distill/coder/research .env defaults
- defined_roles: comma-separated standard role list (extensible)
- get_defined_roles() and get_role_default() helper methods
llm_client.complete():
- New role= parameter (default "chat") for registry-based routing
- model= still accepted for explicit UI toggle override
- _claude() and _local() accept model_cfg dict instead of raw string
- _local() uses pre-resolved config from registry
memory_distiller.py:
- distill_mid/long now use role="distill" (no more distill_backend_* .env vars needed)
cron_runner.py:
- brief jobs use role="chat"
routers/chat.py + auth.py:
- Use model_registry instead of user_settings for local model info
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -72,21 +72,33 @@ def _gemini_status() -> dict:
|
||||
return {"ok": False, "error": str(e), "warning": True, "authenticated": False}
|
||||
|
||||
|
||||
async def _local_status() -> dict:
|
||||
if not settings.local_api_url:
|
||||
async def _local_status(username: str = "scott") -> dict:
|
||||
"""Check reachability of the user's configured local model host."""
|
||||
import model_registry
|
||||
cfg = model_registry.get_best_local_model(username)
|
||||
if not cfg:
|
||||
return {"configured": False}
|
||||
api_url = cfg.get("api_url", "")
|
||||
if not api_url:
|
||||
return {"configured": False}
|
||||
try:
|
||||
import httpx
|
||||
url = settings.local_api_url.rstrip("/") + "/api/models"
|
||||
url = api_url.rstrip("/") + "/api/models"
|
||||
headers = {}
|
||||
if settings.local_api_key:
|
||||
headers["Authorization"] = f"Bearer {settings.local_api_key}"
|
||||
api_key = cfg.get("api_key", "")
|
||||
if api_key:
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
async with httpx.AsyncClient(timeout=5) as client:
|
||||
resp = await client.get(url, headers=headers)
|
||||
reachable = resp.status_code < 400
|
||||
return {"configured": True, "reachable": reachable, "model": settings.local_model}
|
||||
return {
|
||||
"configured": True,
|
||||
"reachable": reachable,
|
||||
"model": cfg.get("model_name", ""),
|
||||
"label": cfg.get("label", ""),
|
||||
}
|
||||
except Exception as e:
|
||||
return {"configured": True, "reachable": False, "error": str(e), "model": settings.local_model}
|
||||
return {"configured": True, "reachable": False, "error": str(e), "model": cfg.get("model_name", "")}
|
||||
|
||||
|
||||
@router.get("/status")
|
||||
|
||||
@@ -11,7 +11,7 @@ from session_store import load as load_session, save as save_session, list_all,
|
||||
from config import settings
|
||||
from persona import set_context, validate as validate_persona
|
||||
from auth_utils import COOKIE_NAME, decode_token
|
||||
import user_settings
|
||||
import model_registry
|
||||
import event_bus
|
||||
|
||||
|
||||
@@ -138,15 +138,15 @@ _BACKEND_FALLBACK = {"claude": "gemini", "gemini": "claude", "local": "claude"}
|
||||
|
||||
|
||||
def _local_model_info(request: Request) -> dict | None:
|
||||
"""Return active local model {label, model_name} for the session user, or None."""
|
||||
"""Return the best local model {label, model_name} for the session user, or None."""
|
||||
try:
|
||||
token = request.cookies.get(COOKIE_NAME)
|
||||
username = decode_token(token) if token else None
|
||||
if not username:
|
||||
return None
|
||||
cfg = user_settings.get_active_local_model(username)
|
||||
cfg = model_registry.get_best_local_model(username, "chat")
|
||||
if cfg:
|
||||
return {"label": cfg["label"], "model_name": cfg["model_name"]}
|
||||
return {"label": cfg.get("label", ""), "model_name": cfg.get("model_name", "")}
|
||||
except (jwt.InvalidTokenError, Exception):
|
||||
pass
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user