diff --git a/cortex/model_registry.py b/cortex/model_registry.py index f2e7b02..5d38267 100644 --- a/cortex/model_registry.py +++ b/cortex/model_registry.py @@ -1,57 +1,72 @@ """ -Per-user unified model registry. +Per-user unified model registry — V2. Stored in: home/{user}/model_registry.json -Schema: +V2 Schema: { - "version": 1, - "hosts": [{"id", "label", "api_url", "api_key", - "host_type": "openwebui" | "openai"}, ...], - # - # host_type controls the API path layout: - # "openwebui" (default) — Open WebUI / Ollama: - # chat: POST {url}/api/chat/completions - # models: GET {url}/api/models - # "openai" — OpenRouter, LiteLLM, Anthropic-compatible, etc.: - # chat: POST {url}/chat/completions - # models: GET {url}/models - # Set api_url to the base path that ends just before /chat/completions, - # e.g. https://openrouter.ai/api/v1 for OpenRouter. + "version": 2, + + # Per-provider accounts / credentials (user-configured) + "providers": { + "anthropic": { + "credentials": [ + {"id": "cli", "label": "Claude CLI (OAuth)", "type": "cli"} + ] + }, + "google": { + "accounts": [ + {"id": "", "label": "My Google account", "api_key": "AIza..."} + ] + } + }, + + # Local OpenAI-compatible hosts (unchanged from V1) + "hosts": [{"id", "label", "api_url", "api_key", "host_type"}, ...], + + # User-registered model entries (all providers) "models": [ { - "id": str, # unique within this registry - "type": str, # "local_openai" | "claude_cli" | "gemini_cli" | "gemini_api" - "label": str, # human-readable display name - "model_name": str, # model identifier sent to the API - "host_id": str | null, # only for local_openai — references hosts[].id - "context_k": int, # context window in thousands of tokens (informational) - "tags": [str], # user-defined capability tags + "id": str, # unique within this registry + "type": str, # see TYPES below + "label": str, # human-readable + "model_name": str, # identifier sent to the API / CLI + "provider": str | null, # "anthropic" | "google" | "local" | null + "host_id": str | null, # local_openai only — references hosts[].id + "credential_id":str | null, # claude_cli only — references providers.anthropic.credentials + "account_id": str | null, # gemini_api only — references providers.google.accounts + "context_k": int, # context window in k tokens (informational) + "tags": [str], # user-defined capability tags }, ], + + # Role assignments — any model (any provider) can fill any role "roles": { "": { "primary": "" | null, "backup_1": "" | null, - "backup_2": "" | null, - "backup_3": "" | null, + ... "backup_4": "" | null, }, }, } -Built-in model IDs (always resolvable, no registry entry required): - "claude_cli" — Claude CLI subprocess (~/.claude/.credentials.json) - "gemini_cli" — Gemini CLI subprocess - "gemini_api" — Gemini API (google-genai SDK; used by orchestrator engine, not llm_client) +Types: + "claude_cli" — Claude CLI subprocess (~/.claude/.credentials.json) + "gemini_cli" — Gemini CLI subprocess + "gemini_api" — Gemini API (google-genai SDK); account_id → api_key from providers.google + "local_openai" — OpenAI-compatible endpoint; host_id → api_url/api_key from hosts[] -Standard roles are defined by settings.defined_roles (default: chat,orchestrator,distill,coder,research). -Additional custom roles can be added freely to roles{}. +Built-in model IDs (always resolvable without a registry entry): + "claude_cli" — resolves to the default Claude CLI model + "gemini_cli" — resolves to Gemini CLI + "gemini_api" — resolves to Gemini API using GEMINI_API_KEY from .env -Resolution for get_model_for_role(username, role): - 1. User registry: roles[role].primary → backup_1 → backup_2 → backup_3 → backup_4 - 2. .env default: ROLE_= (e.g. ROLE_CHAT=claude_cli) +Role resolution for get_model_for_role(username, role): + 1. User registry: roles[role].primary → backup_1 → ... → backup_4 + 2. .env default: ROLE_= 3. Hardcoded last-resort defaults per role + 4. claude_cli (absolute fallback) """ import json @@ -63,11 +78,28 @@ from config import settings logger = logging.getLogger(__name__) + +# ── Provider model catalogs ─────────────────────────────────────────────────── +# Server-side defaults. Update here when providers release new models. +# Users can add entries via the settings UI (Phase 2). + +ANTHROPIC_CATALOG: list[dict] = [ + {"id": "claude-opus-4-7", "label": "Claude Opus 4.7", "context_k": 200}, + {"id": "claude-sonnet-4-6", "label": "Claude Sonnet 4.6", "context_k": 200}, + {"id": "claude-haiku-4-5-20251001", "label": "Claude Haiku 4.5", "context_k": 200}, +] + +GOOGLE_CATALOG: list[dict] = [ + {"id": "gemini-2.5-pro", "label": "Gemini 2.5 Pro", "context_k": 1000}, + {"id": "gemini-2.5-flash", "label": "Gemini 2.5 Flash", "context_k": 1000}, + {"id": "gemini-2.0-flash", "label": "Gemini 2.0 Flash", "context_k": 1000}, + {"id": "gemini-1.5-pro", "label": "Gemini 1.5 Pro", "context_k": 2000}, +] + + # ── Built-in model definitions ──────────────────────────────────────────────── -# These IDs are always resolvable without a registry entry. def _builtins() -> dict[str, dict]: - """Return built-in model definitions (lazy so settings are resolved at call time).""" return { "claude_cli": { "id": "claude_cli", @@ -96,7 +128,6 @@ def _builtins() -> dict[str, dict]: } -# Hardcoded last-resort defaults per role (used only if .env is also unset) _ROLE_LAST_RESORT: dict[str, str] = { "chat": "claude_cli", "orchestrator": "gemini_api", @@ -118,14 +149,40 @@ def _local_llm_path(username: str) -> Path: return settings.home_root() / username / "local_llm.json" +def _auth_path(username: str) -> Path: + return settings.home_root() / username / "auth.json" + + def _empty() -> dict: - return {"version": 1, "hosts": [], "models": [], "roles": {}} + return { + "version": 2, + "providers": _default_providers(), + "hosts": [], + "models": [], + "roles": {}, + } + + +def _default_providers() -> dict: + return { + "anthropic": { + "credentials": [ + {"id": "cli", "label": "Claude CLI (OAuth)", "type": "cli"} + ] + }, + "google": { + "accounts": [] + }, + } def _normalize(data: dict) -> dict: - """Back-fill any missing fields introduced by schema additions.""" + """Back-fill missing fields introduced by schema additions.""" for h in data.get("hosts", []): h.setdefault("host_type", "openwebui") + data.setdefault("providers", _default_providers()) + data["providers"].setdefault("anthropic", {"credentials": [{"id": "cli", "label": "Claude CLI (OAuth)", "type": "cli"}]}) + data["providers"].setdefault("google", {"accounts": []}) return data @@ -135,12 +192,15 @@ def _load(username: str) -> dict: try: data = json.loads(path.read_text()) if isinstance(data, dict) and "version" in data: + if data["version"] == 1: + data = _migrate_v1_to_v2(username, data) + _save(username, data) return _normalize(data) except (json.JSONDecodeError, OSError): logger.warning("model_registry.json for %s is unreadable — starting fresh", username) return _empty() - # No registry yet — try migrating from local_llm.json + # No registry — try migrating from local_llm.json legacy = _local_llm_path(username) if legacy.exists(): data = _migrate_from_local_llm(username, legacy) @@ -157,8 +217,45 @@ def _save(username: str, data: dict) -> None: # ── Migration ───────────────────────────────────────────────────────────────── +def _migrate_v1_to_v2(username: str, data: dict) -> dict: + """ + Upgrade a V1 registry to V2. + + Changes: + - Adds providers section with default structure + - Migrates gemini_api_key from auth.json → first Google account entry + - Sets version to 2 + """ + logger.info("Migrating model_registry.json V1 → V2 for %s", username) + + data["version"] = 2 + if "providers" not in data: + data["providers"] = _default_providers() + else: + data["providers"].setdefault("anthropic", {"credentials": [{"id": "cli", "label": "Claude CLI (OAuth)", "type": "cli"}]}) + data["providers"].setdefault("google", {"accounts": []}) + + # Pull existing Gemini key from auth.json (stored there in V1) → first account entry + accounts = data["providers"]["google"]["accounts"] + if not accounts: + try: + auth = json.loads(_auth_path(username).read_text()) + existing_key = auth.get("gemini_api_key") + if existing_key: + accounts.append({ + "id": secrets.token_hex(4), + "label": "Gemini API Key", + "api_key": existing_key, + }) + logger.info("Migrated gemini_api_key from auth.json → providers.google.accounts for %s", username) + except (OSError, json.JSONDecodeError): + pass + + return data + + def _migrate_from_local_llm(username: str, path: Path) -> dict: - """Convert local_llm.json (hosts/models/active_model_id) → model_registry format.""" + """Convert local_llm.json → V2 model_registry format.""" try: old = json.loads(path.read_text()) except Exception: @@ -190,30 +287,25 @@ def _migrate_from_local_llm(username: str, path: Path) -> dict: "type": "local_openai", "label": m.get("label") or m.get("model_name", ""), "model_name": m.get("model_name", ""), + "provider": "local", "host_id": m.get("host_id"), "context_k": 0, "tags": [], }) - # Build initial role assignments active_id = old.get("active_model_id") - distill_type = settings.distill_backend_mid or None - - roles: dict[str, dict] = {} if active_id and any(m["id"] == active_id for m in data["models"]): - roles["chat"] = {"primary": active_id} + data["roles"]["chat"] = {"primary": active_id} - if distill_type == "local" and active_id: - roles["distill"] = {"primary": active_id} - - data["roles"] = roles + # Migrate Gemini key from auth.json + data = _migrate_v1_to_v2(username, {"version": 1, **data}) return data # ── Model resolution ────────────────────────────────────────────────────────── def _resolve_model(registry: dict, model_id: str) -> dict | None: - """Resolve a model_id to its full config dict, or None if not found.""" + """Resolve a model_id to its full config dict (credentials merged in), or None.""" builtins = _builtins() # Built-in IDs take priority over user-defined entries with the same ID @@ -224,7 +316,9 @@ def _resolve_model(registry: dict, model_id: str) -> dict | None: if not model: return None - if model.get("type") == "local_openai": + model_type = model.get("type") + + if model_type == "local_openai": host_id = model.get("host_id") host = next((h for h in registry.get("hosts", []) if h["id"] == host_id), None) if not host: @@ -237,6 +331,19 @@ def _resolve_model(registry: dict, model_id: str) -> dict | None: "host_type": host.get("host_type", "openwebui"), } + if model_type == "gemini_api": + account_id = model.get("account_id") + if account_id: + accounts = registry.get("providers", {}).get("google", {}).get("accounts", []) + account = next((a for a in accounts if a["id"] == account_id), None) + if account: + return {**model, "api_key": account.get("api_key", "")} + logger.warning("model %s references missing account_id %s", model_id, account_id) + return dict(model) + + if model_type == "claude_cli": + return dict(model) + return dict(model) @@ -277,7 +384,6 @@ def get_best_local_model(username: str, role: str = "chat") -> dict | None: """ Return the best available local_openai model for the given role. Used when the user explicitly selects "local" backend in the UI. - Tries the role's priority chain first, then any configured local model. """ registry = _load(username) role_cfg = registry.get("roles", {}).get(role, {}) @@ -290,7 +396,6 @@ def get_best_local_model(username: str, role: str = "chat") -> dict | None: if resolved and resolved.get("type") == "local_openai": return resolved - # Fall back to first configured local model for model in registry.get("models", []): if model.get("type") == "local_openai": resolved = _resolve_model(registry, model["id"]) @@ -300,15 +405,38 @@ def get_best_local_model(username: str, role: str = "chat") -> dict | None: return None -# ── Read API (for UI and callers) ───────────────────────────────────────────── +def get_google_api_key(username: str, account_id: str | None = None) -> str | None: + """ + Return the best available Gemini API key for the user. + + If account_id is specified, returns that account's key (or None if not found). + Otherwise returns the first configured account key, falling back to the + server-level GEMINI_API_KEY from .env. + """ + registry = _load(username) + accounts = registry.get("providers", {}).get("google", {}).get("accounts", []) + + if account_id: + account = next((a for a in accounts if a["id"] == account_id), None) + return account.get("api_key") if account else None + + # First configured account + if accounts: + return accounts[0].get("api_key") or None + + # Fall back to .env server key + return settings.gemini_api_key or None + + +# ── Read API ────────────────────────────────────────────────────────────────── def get_registry(username: str) -> dict: - """Return the full registry (with built-in models injected for display).""" + """Return the full registry (providers + hosts + models + roles).""" return _load(username) def get_all_models(username: str) -> list[dict]: - """Return all user-defined models (resolved — hosts merged in).""" + """Return all user-defined models (resolved — credentials/hosts merged in).""" registry = _load(username) out = [] for m in registry.get("models", []): @@ -319,24 +447,94 @@ def get_all_models(username: str) -> list[dict]: def get_defined_roles(username: str) -> dict[str, dict]: - """Return the roles section of the registry, filling gaps with empty dicts.""" + """Return the roles section, filling gaps with empty dicts.""" registry = _load(username) roles = registry.get("roles", {}) - result = {} - for role in settings.get_defined_roles(): - result[role] = roles.get(role, {}) - return result + return {role: roles.get(role, {}) for role in settings.get_defined_roles()} -# ── Write API (CRUD) ────────────────────────────────────────────────────────── +def get_google_accounts(username: str) -> list[dict]: + """Return Google account entries (api_key masked for display).""" + registry = _load(username) + accounts = registry.get("providers", {}).get("google", {}).get("accounts", []) + return [ + { + "id": a["id"], + "label": a.get("label", ""), + "hint": (a.get("api_key") or "")[:8] + "…" if a.get("api_key") else "", + } + for a in accounts + ] + + +def get_catalog(provider: str, username: str | None = None) -> list[dict]: + """ + Return the model catalog for a provider. + + For now returns server defaults. Phase 2 will merge in per-user additions. + """ + if provider == "anthropic": + return list(ANTHROPIC_CATALOG) + if provider == "google": + return list(GOOGLE_CATALOG) + return [] + + +# ── Write API — Google accounts ─────────────────────────────────────────────── + +def save_google_account(username: str, account_id: str | None, + label: str, api_key: str) -> str: + """Create or update a Google account entry. Returns the account ID.""" + data = _load(username) + accounts = data["providers"]["google"]["accounts"] + + if account_id: + for a in accounts: + if a["id"] == account_id: + a["label"] = label.strip() + if api_key.strip(): + a["api_key"] = api_key.strip() + _save(username, data) + return account_id + + account_id = secrets.token_hex(4) + accounts.append({ + "id": account_id, + "label": label.strip(), + "api_key": api_key.strip(), + }) + _save(username, data) + return account_id + + +def remove_google_account(username: str, account_id: str) -> bool: + """Remove a Google account. Clears any model entries that reference it.""" + data = _load(username) + accounts = data["providers"]["google"]["accounts"] + before = len(accounts) + data["providers"]["google"]["accounts"] = [a for a in accounts if a["id"] != account_id] + + # Clear role assignments for models that referenced this account + removed_model_ids = { + m["id"] for m in data.get("models", []) + if m.get("account_id") == account_id + } + data["models"] = [m for m in data.get("models", []) if m["id"] not in removed_model_ids] + for role_cfg in data.get("roles", {}).values(): + for key in PRIORITY_KEYS: + if role_cfg.get(key) in removed_model_ids: + role_cfg[key] = None + + _save(username, data) + return len(data["providers"]["google"]["accounts"]) < before + + +# ── Write API — Hosts ───────────────────────────────────────────────────────── def save_host(username: str, host_id: str | None, label: str, api_url: str, api_key: str, host_type: str = "openwebui") -> str: - """Create or update a host. Returns the host ID. - - host_type: "openwebui" (default) or "openai" (OpenRouter, LiteLLM, etc.) - """ + """Create or update a host. Returns the host ID.""" data = _load(username) host_type = host_type if host_type in ("openwebui", "openai") else "openwebui" @@ -350,7 +548,7 @@ def save_host(username: str, host_id: str | None, h["api_key"] = api_key.strip() _save(username, data) return host_id - host_id = None # not found — create new + host_id = None host_id = secrets.token_hex(4) data["hosts"].append({ @@ -365,25 +563,26 @@ def save_host(username: str, host_id: str | None, def remove_host(username: str, host_id: str) -> bool: - """Remove a host and all models that reference it. Returns True if found.""" + """Remove a host and all models that reference it.""" data = _load(username) before = len(data["hosts"]) - data["hosts"] = [h for h in data["hosts"] if h["id"] != host_id] - data["models"] = [m for m in data["models"] if m.get("host_id") != host_id] - # Clear any role assignments that pointed to removed models - removed_ids = {m["id"] for m in data["models"] if m.get("host_id") == host_id} + removed_model_ids = {m["id"] for m in data["models"] if m.get("host_id") == host_id} + data["hosts"] = [h for h in data["hosts"] if h["id"] != host_id] + data["models"] = [m for m in data["models"] if m.get("host_id") != host_id] for role_cfg in data.get("roles", {}).values(): for key in PRIORITY_KEYS: - if role_cfg.get(key) in removed_ids: + if role_cfg.get(key) in removed_model_ids: role_cfg[key] = None _save(username, data) return len(data["hosts"]) < before +# ── Write API — Models ──────────────────────────────────────────────────────── + def save_model(username: str, model_id: str | None, host_id: str, label: str, model_name: str, context_k: int = 0, tags: list[str] | None = None) -> str: - """Create or update a model entry. Returns the model ID.""" + """Create or update a local_openai model entry. Returns the model ID.""" data = _load(username) tags = tags or [] @@ -405,6 +604,7 @@ def save_model(username: str, model_id: str | None, host_id: str, "type": "local_openai", "label": label.strip() or model_name.strip(), "model_name": model_name.strip(), + "provider": "local", "host_id": host_id, "context_k": context_k, "tags": tags, @@ -418,12 +618,10 @@ def remove_model(username: str, model_id: str) -> bool: data = _load(username) before = len(data["models"]) data["models"] = [m for m in data["models"] if m["id"] != model_id] - for role_cfg in data.get("roles", {}).values(): for key in PRIORITY_KEYS: if role_cfg.get(key) == model_id: role_cfg[key] = None - _save(username, data) return len(data["models"]) < before @@ -434,8 +632,7 @@ def set_role(username: str, role: str, priority: str, model_id: str | None) -> b priority must be one of: primary, backup_1, backup_2, backup_3, backup_4 model_id None clears the slot. - model_id "claude_cli" / "gemini_cli" / "gemini_api" are valid built-in IDs. - Returns False if model_id is set but not found. + Built-in IDs (claude_cli, gemini_cli, gemini_api) are always valid. """ if priority not in PRIORITY_KEYS: return False @@ -455,10 +652,14 @@ def set_role(username: str, role: str, priority: str, model_id: str | None) -> b return True -def fetch_models_from_host(api_url: str, api_key: str) -> list[str]: +# ── Utility ─────────────────────────────────────────────────────────────────── + +def fetch_models_from_host(api_url: str, api_key: str, + host_type: str = "openwebui") -> list[str]: """Synchronously fetch the model list from an OpenAI-compatible host.""" import httpx - url = api_url.rstrip("/") + "/api/models" + path = "/api/models" if host_type == "openwebui" else "/models" + url = api_url.rstrip("/") + path headers = {"Authorization": f"Bearer {api_key}"} if api_key else {} resp = httpx.get(url, headers=headers, timeout=10) resp.raise_for_status() diff --git a/cortex/routers/orchestrator.py b/cortex/routers/orchestrator.py index 82017c5..08961d0 100644 --- a/cortex/routers/orchestrator.py +++ b/cortex/routers/orchestrator.py @@ -171,12 +171,18 @@ async def _run_job(job_id: str, req: OrchestrateRequest, user: str) -> None: respond_with_final=req.respond_with_claude, ) else: + # Use the API key embedded in the resolved model config (V2 registry with + # account_id), then fall back to the per-user key from auth.json, then .env. + gemini_key = ( + (orch_model.get("api_key") if orch_model else None) + or get_user_gemini_key(user) + ) result = await orchestrator_engine.run( task=req.task, system_prompt=system_prompt, session_messages=session_messages, respond_with_claude=req.respond_with_claude, - gemini_api_key=get_user_gemini_key(user), + gemini_api_key=gemini_key, ) # Save the turn to the session store so it survives a page refresh diff --git a/documentation/ARCH__BACKENDS.md b/documentation/ARCH__BACKENDS.md index e8e3b54..f3f2a0e 100644 --- a/documentation/ARCH__BACKENDS.md +++ b/documentation/ARCH__BACKENDS.md @@ -1,18 +1,20 @@ # Architecture: LLM Backends > How Cortex selects and talks to AI models. -> Last updated: 2026-04-06 +> Last updated: 2026-04-27 (V2 schema) --- -## Backends +## Providers -| Backend | Type | Auth | Notes | -|---|---|---|---| -| **Claude CLI** | `claude_cli` | OAuth token from `~/.claude/.credentials.json` | Primary chat; model set via `DEFAULT_MODEL` in `.env` | -| **Gemini CLI** | `gemini_cli` | Gemini CLI credentials | Fallback / explicit selection | -| **Gemini API** | `gemini_api` | `GEMINI_API_KEY` in `.env` | Orchestrator tool loop only — not general chat | -| **Local (OpenAI-compat)** | `local_openai` | API key per host in model registry | Open WebUI, Ollama, OpenRouter, LiteLLM, etc. | +Cortex supports four model types, each dispatched differently: + +| Type | Auth | Use | +|---|---|---| +| `claude_cli` | OAuth token from `~/.claude/.credentials.json` | Chat, persona responses | +| `gemini_cli` | Gemini CLI credentials | Chat fallback / explicit selection | +| `gemini_api` | API key from registry account or `.env` | Orchestrator tool loop | +| `local_openai` | API key per host in model registry | Open WebUI, Ollama, OpenRouter, LiteLLM, etc. | --- @@ -26,93 +28,129 @@ request's **role** in the user's model registry. Roles: `chat`, `orchestrator`, Resolution order for a role: 1. User registry: `roles[role].primary → backup_1 → backup_2 → backup_3 → backup_4` -2. `.env` role default: `ROLE_CHAT=claude_cli`, `ROLE_DISTILL=gemini_api`, etc. +2. `.env` role default: `ROLE_CHAT=claude_cli`, `ROLE_DISTILL=claude_cli`, etc. 3. Hardcoded last-resort: `chat/distill/coder → claude_cli`, `orchestrator/research → gemini_api` ### Explicit Override The UI backend toggle cycles: **auto → claude → gemini → local → auto** -- **auto** (default): role-based routing as above; sends `model: null` to `/chat` -- **claude / gemini / local**: bypasses role routing; forces that specific backend -- When "local" is active, the configured model name appears below the toggle button +- **auto** (default): role-based routing as above +- **claude / gemini / local**: bypasses role routing; forces that backend type +- The toggle will be redesigned in Phase 3 to cycle through chat role slots (Primary / Backup 1 / Backup 2) -**Fallback chain** (automatic, on any error): +**Fallback chain** (automatic, only when no explicit registry entry exists): ``` claude → gemini gemini → claude local → claude ``` +When a model is explicitly configured in the registry, errors surface immediately — no silent fallback. -Each response includes a model label (bottom-right of the message bubble) showing what -actually responded. Amber label with `⚡` = fallback was used. - -Auth expiry on Claude triggers a UI banner + `claude_auth_expired` SSE event. +Each response shows a model tag (bottom-right of the message bubble) with the model label and host. --- -## Model Registry +## Model Registry — V2 Schema Per-user configuration stored in `home/{user}/model_registry.json`. -Hosts and models are managed at **Settings → Model Registry** (`/settings/local`). - -### Schema +Managed at **Settings → Model Registry** (`/settings/local`). Full provider UI coming in Phase 2. ```json { - "version": 1, + "version": 2, + + "providers": { + "anthropic": { + "credentials": [ + {"id": "cli", "label": "Claude CLI (OAuth)", "type": "cli"} + ] + }, + "google": { + "accounts": [ + {"id": "a1b2", "label": "One Sky IT", "api_key": "AIza..."} + ] + } + }, + "hosts": [ { "id": "abc123", - "label": "Home ML Laptop", + "label": "Gaming Laptop", "api_url": "http://192.168.x.x:3000", - "api_key": "sk-...", + "api_key": "", "host_type": "openwebui" } ], + "models": [ { - "id": "def456", + "id": "m1", + "type": "claude_cli", + "label": "Sonnet 4.6 (CLI)", + "model_name": "claude-sonnet-4-6", + "provider": "anthropic", + "credential_id": "cli", + "context_k": 200, + "tags": ["chat", "persona"] + }, + { + "id": "m2", + "type": "gemini_api", + "label": "Gemini 2.5 Flash (OSIT)", + "model_name": "gemini-2.5-flash", + "provider": "google", + "account_id": "a1b2", + "context_k": 1000, + "tags": ["orchestrator", "research"] + }, + { + "id": "m3", "type": "local_openai", - "label": "Gemma Medium", - "model_name": "agent-support-gemma-medium", + "label": "Gemma 4 E4B", + "model_name": "gemma4:e4b", + "provider": "local", "host_id": "abc123", - "context_k": 50, - "tags": ["chat", "fast"] + "context_k": 72, + "tags": ["fast", "local"] } ], + "roles": { - "chat": { - "primary": "def456", - "backup_1": "claude_cli" - } + "chat": {"primary": "m1", "backup_1": "m2", "backup_2": "m3"}, + "orchestrator": {"primary": "m2", "backup_1": "m3"}, + "distill": {"primary": "m1"} } } ``` -### host_type - -Controls which API path layout is used: +### host_type (local hosts) | `host_type` | Chat endpoint | Models endpoint | Use for | |---|---|---|---| | `openwebui` (default) | `POST {url}/api/chat/completions` | `GET {url}/api/models` | Open WebUI, Ollama | | `openai` | `POST {url}/chat/completions` | `GET {url}/models` | OpenRouter, LiteLLM, Anthropic-compat | -Set `api_url` to the base path ending just before `/chat/completions`: +Set `api_url` to the base path before `/chat/completions`: - OpenRouter: `https://openrouter.ai/api/v1` -- LiteLLM proxy: `http://host:port` ### Built-in model IDs -Always resolvable without a registry entry: +Always resolvable without a user-created registry entry. Used as role defaults. -| ID | Backend | -|---|---| -| `claude_cli` | Claude CLI subprocess | -| `gemini_cli` | Gemini CLI subprocess | -| `gemini_api` | Gemini API (SDK) — orchestrator only | +| ID | Type | Notes | +|---|---|---| +| `claude_cli` | `claude_cli` | Model from `DEFAULT_MODEL` in `.env` | +| `gemini_cli` | `gemini_cli` | Gemini CLI subprocess | +| `gemini_api` | `gemini_api` | Model from `ORCHESTRATOR_MODEL` in `.env`; key from `GEMINI_API_KEY` | + +### V1 → V2 migration + +Automatic on first load. Changes: +- Adds `providers` section (Anthropic CLI credential + empty Google accounts) +- Migrates `gemini_api_key` from `auth.json` → `providers.google.accounts[0]` +- All existing hosts, models, and role assignments are preserved --- @@ -122,9 +160,9 @@ Runs `claude --print --no-session-persistence --output-format text` as a subproc - System prompt passed via `--system-prompt` - Conversation history formatted as `` block -- Token read live from `~/.claude/.credentials.json` on every call — never relies on the +- Token read live from `~/.claude/.credentials.json` on every call — never uses the env var, which goes stale after `claude auth login` -- Model override via `--model` flag when a specific `model_name` is configured in the registry +- Model override via `--model` flag when `model_name` is set in the registry entry Timeout: `TIMEOUT_CLAUDE=60` seconds (`.env`) @@ -136,7 +174,7 @@ Runs `gemini --output-format text --extensions "" -p ` as a subprocess. - `--extensions ""` disables all MCP extensions — prevents child processes keeping pipes open - `start_new_session=True` puts the process in its own group for clean `os.killpg` on timeout -- Output is cleaned to strip CLI noise lines (loading messages, retry notices, quota warnings) +- Output is cleaned to strip CLI noise (loading messages, retry notices, quota warnings) Timeout: `TIMEOUT_GEMINI=120` seconds (`.env`) @@ -155,13 +193,30 @@ Timeout: `TIMEOUT_LOCAL=300` seconds (`.env`) — local models may need to load --- +## Gemini API (Orchestrator) + +Used by `orchestrator_engine.py` for the ReAct tool loop. Not used for general chat. + +API key resolution order: +1. `api_key` embedded in the resolved orchestrator model config (V2 registry with `account_id`) +2. `get_user_gemini_key(user)` — reads from `auth.json` (legacy, kept for compat) +3. `GEMINI_API_KEY` in `.env` (server default) + +--- + ## Distillation -Memory distillation uses `role="distill"` for mid and long passes. Configure the distill -model via the Model Registry → Role Assignments → Distill role. +Memory distillation uses `role="distill"`. Configure via Model Registry → Role Assignments. -`.env` override: `ROLE_DISTILL=claude_cli` (default). Set to any built-in ID or leave blank -to fall through to the hardcoded default (`claude_cli`). +`.env` override: `ROLE_DISTILL=claude_cli` (default). + +--- + +## Future: Phase 3 — Backend Toggle Redesign + +The `claude → gemini → local` toggle will be replaced with a slot toggle that cycles +through the chat role's configured models (Primary → Backup 1 → Backup 2), showing +the actual model label. See `DESIGN__Model_Registry_V2.md`. --- @@ -170,7 +225,8 @@ to fall through to the hardcoded default (`claude_cli`). | File | Responsibility | |---|---| | `cortex/llm_client.py` | `complete()` — routing, dispatch, fallback | -| `cortex/model_registry.py` | Per-user registry CRUD and resolution | +| `cortex/model_registry.py` | Per-user registry CRUD and resolution (V2) | | `cortex/routers/local_llm.py` | Settings UI routes + `/api/models/role` AJAX | | `cortex/routers/chat.py` | `_backend_label()`, `fallback_used` flag | +| `cortex/routers/orchestrator.py` | Engine selection, Gemini API key resolution | | `cortex/config.py` | `ROLE_*` env defaults, `DEFINED_ROLES`, `PRIMARY_BACKEND` | diff --git a/documentation/DESIGN__Model_Registry_V2.md b/documentation/DESIGN__Model_Registry_V2.md new file mode 100644 index 0000000..088d469 --- /dev/null +++ b/documentation/DESIGN__Model_Registry_V2.md @@ -0,0 +1,310 @@ +# Model Registry V2 — Design Document + +> Status: Planning / Pre-implementation +> Goal: Unified, provider-agnostic model management with clean role-based routing + +--- + +## Problem Statement + +The current system has two classes of models with different treatment: + +| Type | How configured | How selected | +|---|---|---| +| Claude, Gemini | Hardcoded built-ins (`claude_cli`, `gemini_api`) | Backend toggle string ("claude"/"gemini") | +| Local (Ollama, Open WebUI) | Configured via `/settings/local` | Backend toggle string "local" | + +This breaks down when you want: +- Multiple Gemini API keys (e.g. one per Google account) +- Claude via direct API key instead of OAuth CLI +- OpenRouter or other hosted providers alongside local models +- Role assignments to span all provider types uniformly +- A chat toggle that shows "which model" not "which service" + +--- + +## Proposed Architecture + +### Core concept: Providers + Credentials + Models + Roles + +``` +Providers (built-in, fixed set) + └─ Anthropic ← has a catalog of Claude model IDs + └─ Google ← has a catalog of Gemini model IDs + └─ Local Host ← OpenAI-compatible endpoint (user adds these) + +Credentials (user-configured, per provider) + └─ Anthropic ← Claude CLI (OAuth, default) or API key + └─ Google ← one or more API keys (one per Google account) + └─ Local Host ← api_key stored on the host record (existing) + +Model Entries (user-registered — "I want to use this model") + └─ Provider + model ID + credential = one usable model entry + └─ Same model ID with two different accounts = two model entries + +Role Assignments (unified — any model entry can fill any role) + └─ chat: primary → backup_1 → backup_2 + └─ orchestrator: primary → backup_1 + └─ distill: primary + └─ (etc.) +``` + +### Backend toggle redesign + +**Current:** cycles service type strings — `auto → claude → gemini → local` +**New:** cycles through the chat role's assigned models — `Primary → Backup 1 → Backup 2` + +The toggle displays the active model's label (e.g. "Sonnet 4.6" / "Gemini 2.5 Flash" / "Gemma 4 E4B"). Auto defaults to Primary. + +This means the toggle is context-free — it just picks a slot — and all the "what model, what provider, what credentials" logic lives in the registry. + +--- + +## Data Model (V2 Schema) + +Stored in `home/{user}/model_registry.json`. + +```json +{ + "version": 2, + + "providers": { + "anthropic": { + "catalog": [ + {"id": "claude-opus-4-7", "label": "Claude Opus 4.7", "context_k": 200}, + {"id": "claude-sonnet-4-6", "label": "Claude Sonnet 4.6", "context_k": 200}, + {"id": "claude-haiku-4-5", "label": "Claude Haiku 4.5", "context_k": 200} + ], + "credentials": [ + {"id": "cli", "label": "Claude CLI (OAuth)", "type": "cli"} + ] + }, + "google": { + "catalog": [ + {"id": "gemini-2.5-pro", "label": "Gemini 2.5 Pro", "context_k": 1000}, + {"id": "gemini-2.5-flash", "label": "Gemini 2.5 Flash", "context_k": 1000}, + {"id": "gemini-2.0-flash", "label": "Gemini 2.0 Flash", "context_k": 1000}, + {"id": "gemini-1.5-pro", "label": "Gemini 1.5 Pro", "context_k": 2000} + ], + "accounts": [ + {"id": "osit", "label": "One Sky IT (scott.idem@oneskyit.com)", "api_key": "AIza..."} + ] + } + }, + + "hosts": [ + { + "id": "h1", + "label": "Gaming Laptop", + "api_url": "http://192.168.x.x:3000", + "api_key": "", + "host_type": "openwebui" + } + ], + + "models": [ + { + "id": "m1", + "label": "Sonnet 4.6 (CLI)", + "type": "claude_cli", + "provider": "anthropic", + "model_name": "claude-sonnet-4-6", + "credential_id": "cli", + "context_k": 200, + "tags": ["chat", "persona"] + }, + { + "id": "m2", + "label": "Gemini 2.5 Flash (OSIT)", + "type": "gemini_api", + "provider": "google", + "model_name": "gemini-2.5-flash", + "account_id": "osit", + "context_k": 1000, + "tags": ["orchestrator", "research"] + }, + { + "id": "m3", + "label": "Gemma 4 E4B", + "type": "local_openai", + "provider": "local", + "host_id": "h1", + "model_name": "gemma4:e4b", + "context_k": 72, + "tags": ["fast", "local"] + } + ], + + "roles": { + "chat": {"primary": "m1", "backup_1": "m2", "backup_2": "m3"}, + "orchestrator":{"primary": "m2", "backup_1": "m3"}, + "distill": {"primary": "m1"} + } +} +``` + +### Key differences from V1 + +| V1 | V2 | +|---|---| +| Built-ins (`claude_cli`, `gemini_api`) are hardcoded constants | All models are registry entries — built-ins become auto-populated defaults | +| Single Gemini API key in `auth.json` | `providers.google.accounts[]` — list of accounts | +| Role assignments only work with local models in UI | All models in all roles | +| Host list only for local | Host list stays for local; `providers` section for cloud | +| `type` field existed but only `local_openai` was user-configurable | `type` fully determines dispatch for all models | + +--- + +## Resolution Logic (updated) + +`get_model_for_role(username, role)` stays the same interface. Internally: + +1. Walk `roles[role].primary → backup_1 → backup_2 → backup_3 → backup_4` +2. For each slot: resolve the model entry → merge in credentials +3. If no registry entry for a role: fall back to `.env` defaults, then hardcoded + +`_resolve_model(registry, model_id)` gains new merge cases: +- `type == "claude_cli"` → merge in credential from `providers.anthropic.credentials` +- `type == "gemini_api"` → merge in `api_key` from `providers.google.accounts[account_id]` +- `type == "local_openai"` → merge host fields (existing logic, unchanged) + +### Backend toggle → dispatch + +``` +UI sends: slot = "primary" | "backup_1" | "backup_2" | null (auto) +``` + +`llm_client.complete()` resolves the slot against the `chat` role, gets a full model config, dispatches by `type`. No more `"claude"/"gemini"/"local"` string matching. + +--- + +## Routing Code Changes + +### `llm_client.complete()` +- **Remove:** `model: str | None` → service type string +- **Add:** `slot: str | None = None` → role slot override ("primary"/"backup_1"/etc.) +- Dispatch table: `type` → handler + - `claude_cli` → `_claude()` (unchanged) + - `claude_api` → `_claude_api()` (new, direct Anthropic API — future phase) + - `gemini_cli` → `_gemini()` (unchanged) + - `gemini_api` → `_gemini_api()` (new, replaces current hardcoded gemini_api built-in) + - `local_openai` → `_local()` (unchanged) + +### `orchestrator_engine.py` / `openai_orchestrator.py` +- Get orchestrator model via `get_model_for_role(username, "orchestrator")` +- Already works — `openai_orchestrator.py` runs when type is `local_openai` +- `orchestrator_engine.py` (Gemini) runs when type is `gemini_api` + +### Chat router (`routers/chat.py`) +- Accept `slot` instead of `model` from UI +- Pass to `llm_client.complete(slot=slot)` + +--- + +## Settings UI Redesign + +### New page structure + +``` +/settings/models ← unified model registry (replaces /settings/local) + Section 1: Cloud Providers + Anthropic + - credential: Claude CLI (OAuth) [default, always there] + - + Add API Key (future) + - model catalog [editable list of available Claude models] + Google + - accounts: [osit key ●●●●, + Add account] + - model catalog [editable list of available Gemini models] + Section 2: Local Hosts + [existing host cards, unchanged] + Section 3: Models + [unified list — all registered model entries across all providers] + + Add Model (provider picker first, then model + credential/account dropdowns) + +/settings/roles ← standalone page (or promoted to /settings/models bottom) + Role Assignments + chat: [primary ▾] [backup 1 ▾] [backup 2 ▾] + orchestrator: [primary ▾] [backup 1 ▾] + distill: [primary ▾] + (all dropdowns show all models from all providers) +``` + +### Backend toggle in chat UI + +Replace the `claude → gemini → local → auto` cycle with: + +``` +[Model label] ▾ (clickable cycles through chat role slots) +``` + +- Shows the label of the currently active chat model +- Click cycles: Primary → Backup 1 → Backup 2 → Primary +- Slots with no model assigned are skipped +- Color: same purple/amber/slate theme, based on provider type (optional) + +--- + +## Migration + +V1 → V2 is handled in `_load()`: + +1. Detect `version == 1` (or missing) +2. Synthesize `providers.anthropic` catalog from hardcoded defaults +3. Synthesize `providers.google` — migrate API key from `auth.json` as first account +4. Convert built-in role assignments (`claude_cli` / `gemini_api`) to new model entry IDs +5. Existing `hosts[]` and `local_openai` models carry over unchanged +6. Write `version: 2` and save + +No data loss. Old `local_llm.json` migration path still works (V0 → V1 → V2). + +--- + +## Phases + +### Phase 1 — Data model + backend routing (no UI changes yet) +- Extend schema to V2 in `model_registry.py` +- Migration from V1 on first load +- Update `_resolve_model()` to handle `gemini_api` + account lookup +- Update `llm_client.complete()` to accept `slot` parameter +- Update `routers/chat.py` to pass `slot` instead of backend string +- Keep backend toggle UI working (map old strings to slots temporarily) +- **Deliverable:** routing works with multi-account Gemini, no UI changes needed yet + +### Phase 2 — Cloud provider UI +- Add Anthropic and Google sections to `/settings/local` (rename to `/settings/models`) +- Google accounts: add/remove API keys with labels +- Editable model catalog for Anthropic + Google (add/remove model IDs from the list) +- Model entry creation: provider picker → model dropdown (from catalog) → account/credential picker +- **Deliverable:** can register cloud models in the UI just like local models + +### Phase 3 — Unified role assignments + toggle redesign +- Promote role assignments to standalone `/settings/roles` page (or `/settings/models` bottom) +- All models from all providers appear in role selects +- Chat UI toggle: replace service-type cycle with slot cycle, show model label +- **Deliverable:** end-to-end unified experience + +### Phase 4 — Polish + future providers +- Claude direct API key support (optional, CLI is fine for now) +- OpenRouter as a named provider (already works as a "local" host with host_type=openai — could be promoted) +- Model catalog sync: fetch available models from Anthropic/Google API if keys are present +- Per-role "test" button in role assignments UI + +--- + +## Open Questions + +1. **Claude direct API key:** Is this needed now, or is CLI OAuth sufficient for all users? + - Decision: CLI-only for Phase 1; add API key support in Phase 4 if needed + +2. **Catalog management:** Should the Anthropic/Google catalogs be server-wide defaults + that users can extend, or fully per-user? + - Recommendation: ship sensible defaults in code (updated with each deploy); + users can add custom entries if needed + +3. **Toggle UX:** Cycle through slot labels ("Primary / Backup 1 / Backup 2") or cycle + through model labels ("Sonnet 4.6 / Gemini 2.5 Flash / Gemma 4")? + - Model labels are more useful — clearer what you're switching to + +4. **Orchestrator mode toggle:** Does agent mode also respect the slot toggle, or is it + always "use orchestrator role"? + - Keep orchestrator role separate; the UI toggle only affects `chat` role diff --git a/documentation/MASTER.md b/documentation/MASTER.md index 335d28f..1e837eb 100644 --- a/documentation/MASTER.md +++ b/documentation/MASTER.md @@ -1,7 +1,7 @@ # Cortex / Inara — Master Index > Start here. This document is a map, not a manual. -> Last updated: 2026-04-03 +> Last updated: 2026-04-27 --- @@ -24,6 +24,7 @@ Cortex is a self-hosted personal AI platform. It routes messages from any input | Gemini backend | ✅ Live | Fallback — via Gemini CLI | | Local backend | ✅ Live | Third option — Open WebUI/Ollama on scott_gaming | | Gemini orchestrator | ✅ Live | Tool loop → Claude response, Agent mode in UI | +| Model registry V2 | ✅ Live | Providers (Anthropic/Google/Local), multi-account Gemini | | Memory distillation | ✅ Live | Short (daily) / Mid (weekly) / Long (monthly) | | Multi-user | ✅ Live | Scott, Holly, Brian — each with own personas | | Session search | ✅ Live | Full-text search across past session logs | diff --git a/documentation/TODO__Agents.md b/documentation/TODO__Agents.md index e994146..2af2b31 100644 --- a/documentation/TODO__Agents.md +++ b/documentation/TODO__Agents.md @@ -27,6 +27,13 @@ Gemini API orchestrator for private/offline tasks. ## 🟡 Medium Priority +### [Models] Model Registry V2 — Unified Provider System +See `DESIGN__Model_Registry_V2.md` for full design. +- [x] **Phase 1** — V2 schema with providers (Anthropic/Google), multi-account Gemini, auto migration, orchestrator uses account API key — 2026-04-27 +- [ ] **Phase 2** — Cloud provider UI: Anthropic + Google sections in `/settings/models`, account management, model entry creation for cloud models +- [ ] **Phase 3** — Unified roles + toggle redesign: standalone role assignments, chat toggle cycles role slots (Primary/Backup 1/Backup 2) showing model label +- [ ] **Phase 4** — Polish: Claude API key, OpenRouter as named provider, catalog sync from API + ### [Intelligence] Knowledge consolidation — Phase 1 See `ARCH__Intelligence_Layer.md` for full design. - [x] Tool: `ae_journal_search` — search before creating to avoid duplicates