diff --git a/cortex/llm_client.py b/cortex/llm_client.py index 6b140e6..28c19d9 100644 --- a/cortex/llm_client.py +++ b/cortex/llm_client.py @@ -49,14 +49,17 @@ async def complete( messages: list[dict], model: str | None = None, role: str = "chat", + slot: str | None = None, max_tokens: int = 2048, ) -> tuple[str, str]: """ Returns (response_text, actual_backend_used). - model: explicit backend override ("claude" | "gemini" | "local") from UI toggle. + slot: Phase 3 — specific role slot ("primary" | "backup_1" | "backup_2"). + Resolves that exact slot, no fallback chain. Takes priority over model. + model: legacy backend override ("claude" | "gemini" | "local") from old toggle. None = resolve via model registry for the given role. - role: registry role used when model is None (default: "chat"). + role: registry role used for slot/auto routing (default: "chat"). """ import model_registry as _reg from persona import _user @@ -64,21 +67,31 @@ async def complete( username = _user.get() resolved_cfg: dict | None = None - if model in _EXPLICIT_BACKENDS: - # User explicitly selected a backend in the UI - if model == "local": - resolved_cfg = _reg.get_best_local_model(username, role) - if not resolved_cfg: - raise RuntimeError("No local model configured — add one at /settings/models") - primary = model - else: - # Role-based routing via model registry - resolved = _reg.get_model_for_role(username, role) - if resolved: - resolved_cfg = resolved - primary = _TYPE_TO_BACKEND.get(resolved["type"], "claude") + if slot is not None: + # Phase 3: explicit slot selection — no fallback within the role + resolved_cfg = _reg.get_model_for_slot(username, role, slot) + if resolved_cfg: + primary = _TYPE_TO_BACKEND.get(resolved_cfg["type"], "claude") else: - primary = settings.primary_backend + # Slot not configured — fall through to auto routing + slot = None + + if slot is None: + if model in _EXPLICIT_BACKENDS: + # Legacy: explicit backend override from old UI toggle + if model == "local": + resolved_cfg = _reg.get_best_local_model(username, role) + if not resolved_cfg: + raise RuntimeError("No local model configured — add one at /settings/models") + primary = model + else: + # Auto: role-based routing via model registry + resolved = _reg.get_model_for_role(username, role) + if resolved: + resolved_cfg = resolved + primary = _TYPE_TO_BACKEND.get(resolved["type"], "claude") + else: + primary = settings.primary_backend fallback = _FALLBACK.get(primary, "claude") @@ -89,9 +102,7 @@ async def complete( err_str = str(e) if primary == "claude" and any(k in err_str for k in ("401", "authenticate", "expired", "OAuth")): await event_bus.publish({"type": "claude_auth_expired"}) - # Only fall back when using a default/auto backend. - # If the user has explicitly configured a model via the registry, - # surface the error so they know something is wrong. + # Surface errors when a model is explicitly configured or a specific slot was pinned. if resolved_cfg is not None: logger.error("%s failed (no fallback — model explicitly configured): %s", primary, e) raise diff --git a/cortex/model_registry.py b/cortex/model_registry.py index 1f1f11e..22435af 100644 --- a/cortex/model_registry.py +++ b/cortex/model_registry.py @@ -415,6 +415,23 @@ def get_best_local_model(username: str, role: str = "chat") -> dict | None: return None +def get_model_for_slot(username: str, role: str, slot: str) -> dict | None: + """ + Resolve a single named priority slot from a role without walking the fallback chain. + + Used by Phase 3 explicit slot selection — the user has pinned a specific model; + don't silently redirect to another slot if this one is empty or broken. + Returns None if the slot is unset or the model can't be resolved. + """ + if slot not in PRIORITY_KEYS: + return None + registry = _load(username) + model_id = registry.get("roles", {}).get(role, {}).get(slot) + if not model_id: + return None + return _resolve_model(registry, model_id) + + def get_google_api_key(username: str, account_id: str | None = None) -> str | None: """ Return the best available Gemini API key for the user. diff --git a/cortex/routers/chat.py b/cortex/routers/chat.py index 6c14173..0284d1d 100644 --- a/cortex/routers/chat.py +++ b/cortex/routers/chat.py @@ -20,7 +20,7 @@ router = APIRouter() def _backend_label(backend: str, username: str, role: str = "chat") -> str: - """Human-readable label for the model that handled a request.""" + """Human-readable label for the model that handled a request (legacy path).""" if backend == "claude": return "Claude" if backend == "gemini": @@ -33,15 +33,24 @@ def _backend_label(backend: str, username: str, role: str = "chat") -> str: return backend.title() +def _resolve_slot_label(username: str, slot: str) -> str | None: + """Return the configured model label for a chat role slot, or None.""" + cfg = model_registry.get_model_for_slot(username, "chat", slot) + if cfg: + return cfg.get("label") or cfg.get("model_name") + return None + + class ChatRequest(BaseModel): message: str session_id: str | None = None tier: int | None = None - model: str | None = None # "claude" or "gemini" to override; None = use primary_backend + model: str | None = None # legacy backend override ("claude"|"gemini"|"local") + slot: str | None = None # Phase 3: role slot ("primary"|"backup_1"|"backup_2") include_long: bool = True include_mid: bool = True include_short: bool = True - off_record: bool = False # skip session log (in-memory context preserved) + off_record: bool = False # skip session log (in-memory context preserved) user: str = "scott" persona: str = "inara" @@ -94,6 +103,7 @@ async def _stream_chat(req: ChatRequest): system_prompt=system_prompt, messages=history, model=req.model, + slot=req.slot, )) try: @@ -109,7 +119,11 @@ async def _stream_chat(req: ChatRequest): try: response_text, actual_backend = task.result() - backend_label = _backend_label(actual_backend, user, role="chat") + # Use the slot's model label when a slot was pinned; fall back to generic label + if req.slot: + backend_label = _resolve_slot_label(user, req.slot) or _backend_label(actual_backend, user) + else: + backend_label = _backend_label(actual_backend, user, role="chat") host = platform.node() history.append({ "role": "assistant", @@ -164,28 +178,59 @@ _BACKEND_CYCLE = ("claude", "gemini", "local") _BACKEND_FALLBACK = {"claude": "gemini", "gemini": "claude", "local": "claude"} +def _request_user(request: Request) -> str | None: + """Extract username from JWT cookie, or None.""" + try: + token = request.cookies.get(COOKIE_NAME) + return decode_token(token) if token else None + except (jwt.InvalidTokenError, Exception): + return None + + def _local_model_info(request: Request) -> dict | None: """Return the best local model {label, model_name} for the session user, or None.""" + username = _request_user(request) + if not username: + return None try: - token = request.cookies.get(COOKIE_NAME) - username = decode_token(token) if token else None - if not username: - return None cfg = model_registry.get_best_local_model(username, "chat") if cfg: return {"label": cfg.get("label", ""), "model_name": cfg.get("model_name", "")} - except (jwt.InvalidTokenError, Exception): + except Exception: pass return None +def _chat_models_for_toggle(username: str) -> list[dict]: + """Return non-empty chat role slots as [{slot, label, type}] for the UI toggle.""" + registry = model_registry.get_registry(username) + role_cfg = registry.get("roles", {}).get("chat", {}) + result = [] + for slot in model_registry.PRIORITY_KEYS[:3]: + model_id = role_cfg.get(slot) + if not model_id: + continue + resolved = model_registry._resolve_model(registry, model_id) + if resolved: + result.append({ + "slot": slot, + "label": resolved.get("label") or resolved.get("model_name") or slot, + "type": resolved.get("type", ""), + }) + return result + + @router.get("/backend") async def get_backend(request: Request) -> dict: + username = _request_user(request) + chat_models = _chat_models_for_toggle(username) if username else [] p = settings.primary_backend return { - "primary": p, - "fallback": _BACKEND_FALLBACK.get(p, "claude"), - "local_model": _local_model_info(request), + "chat_models": chat_models, + # Legacy fields kept for backward compat + "primary": p, + "fallback": _BACKEND_FALLBACK.get(p, "claude"), + "local_model": _local_model_info(request), } diff --git a/cortex/static/app.js b/cortex/static/app.js index 939337b..b8b7f81 100644 --- a/cortex/static/app.js +++ b/cortex/static/app.js @@ -254,8 +254,8 @@ : 'Private note — only you see this…'; } else if (current_mode === 'agent') { inputEl.placeholder = ctrlEnterMode - ? `Task for ${personaLabel}… (Gemini tool loop — Ctrl+Enter to run)` - : `Task for ${personaLabel}… (Gemini tool loop)`; + ? `Task for ${personaLabel}… (orchestrator — Ctrl+Enter to run)` + : `Task for ${personaLabel}… (orchestrator)`; } else if (current_mode === 'otr') { inputEl.placeholder = 'Off the record — not logged or distilled…'; } else { @@ -340,58 +340,48 @@ } // ── Backend toggle ─────────────────────────────────────────── - // null = "auto" — uses role-based routing from model registry - // 'claude' / 'gemini' / 'local' = explicit override + // Phase 3: cycles through the chat role's configured models by label. + // Sends slot ("primary"|"backup_1"|"backup_2") in chat requests. + // Falls back to legacy "auto" behavior when no models are configured. - // On load only fetch local_model hint; don't override primaryBackend default (null) - fetch('/backend').then(r => r.json()).then(d => { - if (backendModelHint && d.local_model) { - // Pre-fill hint in case user is already in local mode - backendModelHint.textContent = d.local_model.label || d.local_model.model_name; - } - }); - - const BACKEND_CYCLE = [null, 'claude', 'gemini', 'local']; - const BACKEND_CLASS = { claude: '', gemini: 'mem-on', local: 'local-on' }; + const TYPE_CLASS = { claude_cli: '', gemini_api: 'mem-on', gemini_cli: 'mem-on', local_openai: 'local-on' }; const backendModelHint = document.getElementById('backend-model-hint'); - function setBackendUI(backend, localModel) { - primaryBackend = backend; - backendToggle.textContent = backend === null ? 'auto' : backend; - const extra = backend === null ? '' : (BACKEND_CLASS[backend] || ''); - backendToggle.className = 'ctx-btn' + (extra ? ' ' + extra : ''); + let chatSlots = []; // [{slot, label, type}] from /backend + let slotIdx = 0; // index into chatSlots; -1 = auto (no registry models) + function activeSlot() { + return chatSlots.length > 0 ? chatSlots[slotIdx] : null; + } + + function setToggleUI(entry) { + if (!entry) { + backendToggle.textContent = 'auto'; + backendToggle.className = 'ctx-btn'; + primaryBackend = null; + } else { + backendToggle.textContent = entry.label; + backendToggle.className = 'ctx-btn ' + (TYPE_CLASS[entry.type] || ''); + primaryBackend = entry.slot; // used as legacy compat in payload + } if (backendModelHint) { - if (backend === 'local' && localModel) { - backendModelHint.textContent = localModel.label || localModel.model_name; - backendModelHint.style.display = ''; - } else { - backendModelHint.textContent = ''; - backendModelHint.style.display = 'none'; - } + backendModelHint.textContent = ''; + backendModelHint.style.display = 'none'; } } - // Initialize to auto mode - setBackendUI(null, null); + fetch('/backend').then(r => r.json()).then(d => { + chatSlots = d.chat_models || []; + slotIdx = 0; + setToggleUI(chatSlots[0] || null); + }); - backendToggle.addEventListener('click', async () => { - const idx = BACKEND_CYCLE.indexOf(primaryBackend); - const next = BACKEND_CYCLE[(idx + 1) % BACKEND_CYCLE.length]; - if (next === null) { - // Auto: role-based routing — no server call needed - setBackendUI(null, null); - addMessage('system', 'Backend: auto (role-based routing)'); - } else { - const res = await fetch('/backend', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ primary: next }), - }); - const d = await res.json(); - setBackendUI(next, d.local_model); - addMessage('system', `Backend: ${next} (fallback: ${d.fallback})`); - } + backendToggle.addEventListener('click', () => { + if (chatSlots.length === 0) return; + slotIdx = (slotIdx + 1) % chatSlots.length; + const entry = chatSlots[slotIdx]; + setToggleUI(entry); + addMessage('system', `Backend: ${entry.label}`); }); // ── Sessions panel ─────────────────────────────────────────── @@ -1066,7 +1056,7 @@ include_mid: memMid, include_short: memShort, off_record: current_mode === 'otr', - model: primaryBackend, + slot: activeSlot()?.slot || null, user: CORTEX_USER, persona: CORTEX_PERSONA, }; diff --git a/documentation/DESIGN__Model_Registry_V2.md b/documentation/DESIGN__Model_Registry_V2.md index 088d469..b9f79a7 100644 --- a/documentation/DESIGN__Model_Registry_V2.md +++ b/documentation/DESIGN__Model_Registry_V2.md @@ -1,62 +1,71 @@ # Model Registry V2 — Design Document -> Status: Planning / Pre-implementation +> Status: Phase 3 in progress > Goal: Unified, provider-agnostic model management with clean role-based routing --- ## Problem Statement -The current system has two classes of models with different treatment: +The original system had two classes of models with different treatment: | Type | How configured | How selected | |---|---|---| | Claude, Gemini | Hardcoded built-ins (`claude_cli`, `gemini_api`) | Backend toggle string ("claude"/"gemini") | | Local (Ollama, Open WebUI) | Configured via `/settings/local` | Backend toggle string "local" | -This breaks down when you want: -- Multiple Gemini API keys (e.g. one per Google account) -- Claude via direct API key instead of OAuth CLI -- OpenRouter or other hosted providers alongside local models -- Role assignments to span all provider types uniformly -- A chat toggle that shows "which model" not "which service" +This breaks down when you want multiple Gemini API keys, OpenRouter alongside local models, +role assignments spanning all provider types, or a toggle that shows which model is active +instead of which service. --- -## Proposed Architecture +## Architecture ### Core concept: Providers + Credentials + Models + Roles ``` Providers (built-in, fixed set) - └─ Anthropic ← has a catalog of Claude model IDs - └─ Google ← has a catalog of Gemini model IDs + └─ Anthropic ← catalog of Claude model IDs (code constants) + └─ Google ← catalog of Gemini model IDs (code constants) └─ Local Host ← OpenAI-compatible endpoint (user adds these) -Credentials (user-configured, per provider) - └─ Anthropic ← Claude CLI (OAuth, default) or API key +Credentials (user-configured, stored in model_registry.json) + └─ Anthropic ← Claude CLI (OAuth, default) — API key support in Phase 4 └─ Google ← one or more API keys (one per Google account) - └─ Local Host ← api_key stored on the host record (existing) + └─ Local Host ← api_key stored on the host record -Model Entries (user-registered — "I want to use this model") +Model Entries (user-registered) └─ Provider + model ID + credential = one usable model entry - └─ Same model ID with two different accounts = two model entries Role Assignments (unified — any model entry can fill any role) - └─ chat: primary → backup_1 → backup_2 + └─ chat: primary → backup_1 → backup_2 └─ orchestrator: primary → backup_1 - └─ distill: primary + └─ distill: primary └─ (etc.) ``` -### Backend toggle redesign +### Catalog design decision -**Current:** cycles service type strings — `auto → claude → gemini → local` -**New:** cycles through the chat role's assigned models — `Primary → Backup 1 → Backup 2` +Catalogs (`ANTHROPIC_CATALOG`, `GOOGLE_CATALOG`) are **Python constants** in +`model_registry.py`, not stored in the per-user JSON. Updated with each code deploy. +Per-user catalog customisation is deferred to Phase 4. -The toggle displays the active model's label (e.g. "Sonnet 4.6" / "Gemini 2.5 Flash" / "Gemma 4 E4B"). Auto defaults to Primary. +### Backend toggle redesign (Phase 3) -This means the toggle is context-free — it just picks a slot — and all the "what model, what provider, what credentials" logic lives in the registry. +**Before:** cycles service type strings — `auto → claude → gemini → local` + +**After:** cycles through the chat role's configured models by label: +``` +Sonnet 4.6 (CLI) → Gemini 2.5 Flash → Gemma 4 E4B → (wraps) +``` +- Shows the resolved model label on the toggle button +- If no chat role models are configured: shows "auto", uses existing role routing +- Click skips empty slots automatically +- Color: `claude_cli` = default, `gemini_*` = blue, `local_openai` = amber + +UI sends `slot: "primary" | "backup_1" | "backup_2"` (not backend type string). +`llm_client.complete()` resolves that slot from the chat role and dispatches by `type`. --- @@ -67,74 +76,22 @@ Stored in `home/{user}/model_registry.json`. ```json { "version": 2, - "providers": { "anthropic": { - "catalog": [ - {"id": "claude-opus-4-7", "label": "Claude Opus 4.7", "context_k": 200}, - {"id": "claude-sonnet-4-6", "label": "Claude Sonnet 4.6", "context_k": 200}, - {"id": "claude-haiku-4-5", "label": "Claude Haiku 4.5", "context_k": 200} - ], - "credentials": [ - {"id": "cli", "label": "Claude CLI (OAuth)", "type": "cli"} - ] + "credentials": [{"id": "cli", "label": "Claude CLI (OAuth)", "type": "cli"}] }, "google": { - "catalog": [ - {"id": "gemini-2.5-pro", "label": "Gemini 2.5 Pro", "context_k": 1000}, - {"id": "gemini-2.5-flash", "label": "Gemini 2.5 Flash", "context_k": 1000}, - {"id": "gemini-2.0-flash", "label": "Gemini 2.0 Flash", "context_k": 1000}, - {"id": "gemini-1.5-pro", "label": "Gemini 1.5 Pro", "context_k": 2000} - ], - "accounts": [ - {"id": "osit", "label": "One Sky IT (scott.idem@oneskyit.com)", "api_key": "AIza..."} - ] + "accounts": [{"id": "a1b2", "label": "One Sky IT", "api_key": "AIza..."}] } }, - "hosts": [ - { - "id": "h1", - "label": "Gaming Laptop", - "api_url": "http://192.168.x.x:3000", - "api_key": "", - "host_type": "openwebui" - } + {"id": "h1", "label": "Gaming Laptop", "api_url": "http://...", "api_key": "", "host_type": "openwebui"} ], - "models": [ - { - "id": "m1", - "label": "Sonnet 4.6 (CLI)", - "type": "claude_cli", - "provider": "anthropic", - "model_name": "claude-sonnet-4-6", - "credential_id": "cli", - "context_k": 200, - "tags": ["chat", "persona"] - }, - { - "id": "m2", - "label": "Gemini 2.5 Flash (OSIT)", - "type": "gemini_api", - "provider": "google", - "model_name": "gemini-2.5-flash", - "account_id": "osit", - "context_k": 1000, - "tags": ["orchestrator", "research"] - }, - { - "id": "m3", - "label": "Gemma 4 E4B", - "type": "local_openai", - "provider": "local", - "host_id": "h1", - "model_name": "gemma4:e4b", - "context_k": 72, - "tags": ["fast", "local"] - } + {"id": "m1", "type": "claude_cli", "label": "Sonnet 4.6 (CLI)", "model_name": "claude-sonnet-4-6", "provider": "anthropic", "credential_id": "cli", "context_k": 1000, "tags": []}, + {"id": "m2", "type": "gemini_api", "label": "Gemini 2.5 Flash", "model_name": "gemini-2.5-flash", "provider": "google", "account_id": "a1b2", "context_k": 1000, "tags": []}, + {"id": "m3", "type": "local_openai", "label": "Gemma 4 E4B", "model_name": "gemma4:e4b", "provider": "local", "host_id": "h1", "context_k": 72, "tags": []} ], - "roles": { "chat": {"primary": "m1", "backup_1": "m2", "backup_2": "m3"}, "orchestrator":{"primary": "m2", "backup_1": "m3"}, @@ -143,168 +100,100 @@ Stored in `home/{user}/model_registry.json`. } ``` -### Key differences from V1 +### Model types and dispatch -| V1 | V2 | -|---|---| -| Built-ins (`claude_cli`, `gemini_api`) are hardcoded constants | All models are registry entries — built-ins become auto-populated defaults | -| Single Gemini API key in `auth.json` | `providers.google.accounts[]` — list of accounts | -| Role assignments only work with local models in UI | All models in all roles | -| Host list only for local | Host list stays for local; `providers` section for cloud | -| `type` field existed but only `local_openai` was user-configurable | `type` fully determines dispatch for all models | +| `type` | Dispatches via | Notes | +|---|---|---| +| `claude_cli` | Claude CLI subprocess | `~/.claude/.credentials.json` OAuth | +| `gemini_cli` | Gemini CLI subprocess | | +| `gemini_api` | Currently: Gemini CLI (gap — see Phase 4) | Should use google-genai SDK | +| `local_openai` | HTTP to OpenAI-compatible endpoint | host_type controls path | + +### Built-in model IDs + +Always resolvable without a registry entry (used as `.env` role defaults): +`claude_cli`, `gemini_cli`, `gemini_api` --- -## Resolution Logic (updated) +## Resolution Logic -`get_model_for_role(username, role)` stays the same interface. Internally: +`get_model_for_role(username, role)` — walks `primary → backup_1 → backup_2 → backup_3 → backup_4`, returns first resolved model config with credentials merged in. Falls back to `.env` defaults, then hardcoded last-resort. -1. Walk `roles[role].primary → backup_1 → backup_2 → backup_3 → backup_4` -2. For each slot: resolve the model entry → merge in credentials -3. If no registry entry for a role: fall back to `.env` defaults, then hardcoded - -`_resolve_model(registry, model_id)` gains new merge cases: -- `type == "claude_cli"` → merge in credential from `providers.anthropic.credentials` -- `type == "gemini_api"` → merge in `api_key` from `providers.google.accounts[account_id]` -- `type == "local_openai"` → merge host fields (existing logic, unchanged) - -### Backend toggle → dispatch - -``` -UI sends: slot = "primary" | "backup_1" | "backup_2" | null (auto) -``` - -`llm_client.complete()` resolves the slot against the `chat` role, gets a full model config, dispatches by `type`. No more `"claude"/"gemini"/"local"` string matching. +`get_model_for_slot(username, role, slot)` — resolves *only* the named slot, no fallback chain. Used by Phase 3 explicit slot selection. --- -## Routing Code Changes +## Routing Code -### `llm_client.complete()` -- **Remove:** `model: str | None` → service type string -- **Add:** `slot: str | None = None` → role slot override ("primary"/"backup_1"/etc.) -- Dispatch table: `type` → handler - - `claude_cli` → `_claude()` (unchanged) - - `claude_api` → `_claude_api()` (new, direct Anthropic API — future phase) - - `gemini_cli` → `_gemini()` (unchanged) - - `gemini_api` → `_gemini_api()` (new, replaces current hardcoded gemini_api built-in) - - `local_openai` → `_local()` (unchanged) +### `llm_client.complete()` (Phase 3 update) -### `orchestrator_engine.py` / `openai_orchestrator.py` -- Get orchestrator model via `get_model_for_role(username, "orchestrator")` -- Already works — `openai_orchestrator.py` runs when type is `local_openai` -- `orchestrator_engine.py` (Gemini) runs when type is `gemini_api` +``` +slot: str | None → resolve specific slot, no fallback (explicit selection) +model: str | None → legacy backend strings, kept for backward compat +(neither) → auto: role-based routing with full fallback chain +``` -### Chat router (`routers/chat.py`) -- Accept `slot` instead of `model` from UI -- Pass to `llm_client.complete(slot=slot)` +Dispatch table (`type` → backend function): +- `claude_cli` → `_claude()` +- `gemini_cli` → `_gemini()` +- `gemini_api` → `_gemini()` ← **gap: should be `_gemini_api()` (Phase 4)** +- `local_openai` → `_local()` + +### `routers/chat.py` (Phase 3 update) + +- `ChatRequest` gets `slot: str | None = None` +- `GET /backend` returns `chat_models: [{slot, label, type}]` for the UI toggle +- `_stream_chat` resolves model label from slot when `req.slot` is set + +### `app.js` (Phase 3 update) + +- Loads `chat_models` from `GET /backend` on page init +- Toggle cycles through `chat_models` by label, sends `slot` in chat payload +- Agent mode placeholder: remove "Gemini tool loop" hardcode → "orchestrator" --- -## Settings UI Redesign +## Known Gaps (not yet implemented) -### New page structure +### Gap A — `gemini_api` dispatch in `llm_client` (Phase 4) +`_TYPE_TO_BACKEND` maps `gemini_api → "gemini"` (CLI subprocess). If a user assigns a +`gemini_api` type model to the `chat` role, it silently routes to the Gemini CLI instead +of the Google genai SDK. Fix: add `_gemini_api()` in `llm_client.py` that calls the SDK +directly, matching how `orchestrator_engine.py` does it. Needs API key from resolved config. -``` -/settings/models ← unified model registry (replaces /settings/local) - Section 1: Cloud Providers - Anthropic - - credential: Claude CLI (OAuth) [default, always there] - - + Add API Key (future) - - model catalog [editable list of available Claude models] - Google - - accounts: [osit key ●●●●, + Add account] - - model catalog [editable list of available Gemini models] - Section 2: Local Hosts - [existing host cards, unchanged] - Section 3: Models - [unified list — all registered model entries across all providers] - + Add Model (provider picker first, then model + credential/account dropdowns) - -/settings/roles ← standalone page (or promoted to /settings/models bottom) - Role Assignments - chat: [primary ▾] [backup 1 ▾] [backup 2 ▾] - orchestrator: [primary ▾] [backup 1 ▾] - distill: [primary ▾] - (all dropdowns show all models from all providers) -``` - -### Backend toggle in chat UI - -Replace the `claude → gemini → local → auto` cycle with: - -``` -[Model label] ▾ (clickable cycles through chat role slots) -``` - -- Shows the label of the currently active chat model -- Click cycles: Primary → Backup 1 → Backup 2 → Primary -- Slots with no model assigned are skipped -- Color: same purple/amber/slate theme, based on provider type (optional) - ---- - -## Migration - -V1 → V2 is handled in `_load()`: - -1. Detect `version == 1` (or missing) -2. Synthesize `providers.anthropic` catalog from hardcoded defaults -3. Synthesize `providers.google` — migrate API key from `auth.json` as first account -4. Convert built-in role assignments (`claude_cli` / `gemini_api`) to new model entry IDs -5. Existing `hosts[]` and `local_openai` models carry over unchanged -6. Write `version: 2` and save - -No data loss. Old `local_llm.json` migration path still works (V0 → V1 → V2). +### Gap B — Agent mode placeholder (Phase 3, quick fix) +`app.js` lines 257–258 hard-code `"Gemini tool loop"`. Should say `"orchestrator"` since +the orchestrator role can now be a local model. --- ## Phases -### Phase 1 — Data model + backend routing (no UI changes yet) -- Extend schema to V2 in `model_registry.py` -- Migration from V1 on first load -- Update `_resolve_model()` to handle `gemini_api` + account lookup -- Update `llm_client.complete()` to accept `slot` parameter -- Update `routers/chat.py` to pass `slot` instead of backend string -- Keep backend toggle UI working (map old strings to slots temporarily) -- **Deliverable:** routing works with multi-account Gemini, no UI changes needed yet +### Phase 1 — Data model + routing ✅ 2026-04-27 +- V2 schema with `providers` section +- Auto migration V1→V2 (pulls gemini_api_key from auth.json → Google accounts) +- `_resolve_model()` merges account API key for `gemini_api` type +- `get_google_api_key()`, `save_cloud_model()`, `save/remove_google_account()` +- Orchestrator router uses model-resolved API key -### Phase 2 — Cloud provider UI -- Add Anthropic and Google sections to `/settings/local` (rename to `/settings/models`) -- Google accounts: add/remove API keys with labels -- Editable model catalog for Anthropic + Google (add/remove model IDs from the list) -- Model entry creation: provider picker → model dropdown (from catalog) → account/credential picker -- **Deliverable:** can register cloud models in the UI just like local models +### Phase 2 — Cloud provider UI ✅ 2026-04-27 +- `/settings/models` (canonical, `/settings/local` redirects) +- Cloud Providers section: Anthropic info + Google account add/remove +- Add Model form with provider tabs (Local / Google / Anthropic) +- Provider badges on model rows (Anthropic / Google / Local) +- Settings page updated: Gemini Key section replaced by Model Registry card -### Phase 3 — Unified role assignments + toggle redesign -- Promote role assignments to standalone `/settings/roles` page (or `/settings/models` bottom) -- All models from all providers appear in role selects -- Chat UI toggle: replace service-type cycle with slot cycle, show model label -- **Deliverable:** end-to-end unified experience +### Phase 3 — Toggle redesign + routing cleanup 🔄 in progress +- `model_registry.get_model_for_slot()` — resolve a specific slot without fallback chain +- `llm_client.complete()` — add `slot` parameter +- `routers/chat.py` — `ChatRequest.slot`, extend `GET /backend`, slot label in response tag +- `app.js` — data-driven toggle cycling model labels; send `slot` not backend string +- Fix Gap B: agent mode placeholder ### Phase 4 — Polish + future providers -- Claude direct API key support (optional, CLI is fine for now) -- OpenRouter as a named provider (already works as a "local" host with host_type=openai — could be promoted) -- Model catalog sync: fetch available models from Anthropic/Google API if keys are present +- Fix Gap A: `gemini_api` dispatch in `llm_client` → direct Google genai SDK for chat +- Claude direct API key support (alternative to CLI OAuth) +- OpenRouter as a named provider (already works as local host; could be promoted) - Per-role "test" button in role assignments UI - ---- - -## Open Questions - -1. **Claude direct API key:** Is this needed now, or is CLI OAuth sufficient for all users? - - Decision: CLI-only for Phase 1; add API key support in Phase 4 if needed - -2. **Catalog management:** Should the Anthropic/Google catalogs be server-wide defaults - that users can extend, or fully per-user? - - Recommendation: ship sensible defaults in code (updated with each deploy); - users can add custom entries if needed - -3. **Toggle UX:** Cycle through slot labels ("Primary / Backup 1 / Backup 2") or cycle - through model labels ("Sonnet 4.6 / Gemini 2.5 Flash / Gemma 4")? - - Model labels are more useful — clearer what you're switching to - -4. **Orchestrator mode toggle:** Does agent mode also respect the slot toggle, or is it - always "use orchestrator role"? - - Keep orchestrator role separate; the UI toggle only affects `chat` role +- Per-user catalog additions (extend ANTHROPIC_CATALOG / GOOGLE_CATALOG from UI)