feat: unified model registry with role-based routing

Introduces model_registry.py as the single source of truth for all LLM
backend configuration. Replaces scattered backend settings across user_settings,
config distill_backend_*, and the UI toggle.

model_registry.py:
- Per-user home/{user}/model_registry.json with version, hosts, models, roles
- Models have: type (local_openai|claude_cli|gemini_cli|gemini_api), label,
  model_name, host_id, context_k (tokens), tags (capability labels)
- Roles map to priority chains: primary, backup_1..backup_4
- Built-in IDs (claude_cli, gemini_cli, gemini_api) always resolvable
- Auto-migrates existing local_llm.json on first access
- CRUD: save_host, remove_host, save_model, remove_model, set_role
- get_model_for_role(): registry → .env default → hardcoded fallback

config.py:
- role_chat/orchestrator/distill/coder/research .env defaults
- defined_roles: comma-separated standard role list (extensible)
- get_defined_roles() and get_role_default() helper methods

llm_client.complete():
- New role= parameter (default "chat") for registry-based routing
- model= still accepted for explicit UI toggle override
- _claude() and _local() accept model_cfg dict instead of raw string
- _local() uses pre-resolved config from registry

memory_distiller.py:
- distill_mid/long now use role="distill" (no more distill_backend_* .env vars needed)

cron_runner.py:
- brief jobs use role="chat"

routers/chat.py + auth.py:
- Use model_registry instead of user_settings for local model info

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Scott Idem
2026-04-05 21:25:18 -04:00
parent a4daebdc9b
commit 6a1a1c2686
7 changed files with 541 additions and 33 deletions

View File

@@ -31,7 +31,16 @@ async def cleanup() -> None:
_active_pgroups.clear()
_BACKENDS = ("claude", "gemini", "local")
# Map from registry model type → dispatch function key
_TYPE_TO_BACKEND = {
"claude_cli": "claude",
"gemini_cli": "gemini",
"gemini_api": "gemini", # gemini_api falls back to CLI in this context
"local_openai": "local",
}
# Explicit UI toggle values (kept for backward compat)
_EXPLICIT_BACKENDS = ("claude", "gemini", "local")
_FALLBACK = {"claude": "gemini", "gemini": "claude", "local": "claude"}
@@ -39,18 +48,42 @@ async def complete(
system_prompt: str,
messages: list[dict],
model: str | None = None,
role: str = "chat",
max_tokens: int = 2048,
) -> tuple[str, str]:
"""Returns (response_text, actual_backend_used)."""
if model in _BACKENDS:
"""
Returns (response_text, actual_backend_used).
model: explicit backend override ("claude" | "gemini" | "local") from UI toggle.
None = resolve via model registry for the given role.
role: registry role used when model is None (default: "chat").
"""
import model_registry as _reg
from persona import _user
username = _user.get()
resolved_cfg: dict | None = None
if model in _EXPLICIT_BACKENDS:
# User explicitly selected a backend in the UI
if model == "local":
resolved_cfg = _reg.get_best_local_model(username, role)
if not resolved_cfg:
raise RuntimeError("No local model configured — add one at /settings/models")
primary = model
else:
primary = settings.primary_backend
# Role-based routing via model registry
resolved = _reg.get_model_for_role(username, role)
if resolved:
resolved_cfg = resolved
primary = _TYPE_TO_BACKEND.get(resolved["type"], "claude")
else:
primary = settings.primary_backend
fallback = _FALLBACK.get(primary, "claude")
try:
response = await _dispatch(primary, system_prompt, messages, model)
response = await _dispatch(primary, system_prompt, messages, resolved_cfg)
return response, primary
except Exception as e:
err_str = str(e)
@@ -65,13 +98,13 @@ async def _dispatch(
backend: str,
system_prompt: str,
messages: list[dict],
model: str | None,
model_cfg: dict | None,
) -> str:
if backend == "gemini":
return await _gemini(system_prompt, messages)
if backend == "local":
return await _local(system_prompt, messages)
return await _claude(system_prompt, messages, model)
return await _local(system_prompt, messages, model_cfg)
return await _claude(system_prompt, messages, model_cfg)
def _fresh_claude_token() -> str | None:
@@ -91,14 +124,16 @@ def _fresh_claude_token() -> str | None:
return None
async def _claude(system_prompt: str, messages: list[dict], model: str | None) -> str:
async def _claude(system_prompt: str, messages: list[dict], model_cfg: dict | None) -> str:
model_name = (model_cfg or {}).get("model_name") if model_cfg else None
cmd = [
"claude", "--print",
"--no-session-persistence",
"--output-format", "text",
]
if model and model not in ("claude", "gemini"):
cmd.extend(["--model", model])
# Only pass --model if it's a real model name (not a backend type string)
if model_name and model_name not in ("claude", "gemini", "local", ""):
cmd.extend(["--model", model_name])
if system_prompt:
cmd.extend(["--system-prompt", system_prompt])
cmd.append(_build_conversation(messages))
@@ -114,19 +149,22 @@ async def _claude(system_prompt: str, messages: list[dict], model: str | None) -
return await _run(cmd, timeout=settings.timeout_claude, env=env)
async def _local(system_prompt: str, messages: list[dict]) -> str:
async def _local(system_prompt: str, messages: list[dict], model_cfg: dict | None = None) -> str:
"""OpenAI-compatible backend — Open WebUI / Ollama.
Per-user config (home/{user}/local_llm.json) takes precedence over
the server-level .env defaults.
model_cfg is pre-resolved by complete() via model_registry.
Falls back to registry lookup if not provided.
"""
import httpx
from persona import _user
from user_settings import get_active_local_model
cfg = get_active_local_model(_user.get())
cfg = model_cfg
if not cfg:
raise RuntimeError("No local model configured — add one at /settings/local")
# Fallback: resolve directly from registry
import model_registry as _reg
from persona import _user
cfg = _reg.get_best_local_model(_user.get())
if not cfg:
raise RuntimeError("No local model configured — add one at /settings/models")
api_url = cfg["api_url"]
api_key = cfg["api_key"]