diff --git a/cortex/config.py b/cortex/config.py index 4b591a3..2678117 100644 --- a/cortex/config.py +++ b/cortex/config.py @@ -3,7 +3,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict class Settings(BaseSettings): - anthropic_api_key: str | None = None # not used — claude CLI handles auth + anthropic_api_key: str | None = None # not used — configure via model registry # Google OAuth — "Sign in with Google" for all users # Create credentials at console.cloud.google.com → APIs & Services → Credentials @@ -38,7 +38,6 @@ class Settings(BaseSettings): default_model: str = "claude-sonnet-4-6" default_tier: int = 2 max_history_messages: int = 40 # rolling window — 20 turns (user + assistant) - primary_backend: str = "claude" # "claude" | "local" — gemini CLI removed June 2026 # Local model backend — OpenAI-compatible API (Open WebUI / Ollama) # Set LOCAL_API_URL in .env to enable; leave blank to disable @@ -46,9 +45,6 @@ class Settings(BaseSettings): local_api_key: str = "" # sk-... from Open WebUI → Settings → Account → API Keys local_model: str = "" # workspace or model name, e.g. test-agent-simple - # Per-backend timeouts in seconds - timeout_claude: int = 60 - timeout_gemini: int = 120 # frequently slow under load timeout_local: int = 300 # local models may need to load first # Auto-distillation schedule — override in .env @@ -66,14 +62,13 @@ class Settings(BaseSettings): distill_backend_long: str = "" # Model registry: default backend type per role when user registry has no entry. - # Values: "claude_cli" | "gemini_cli" | "gemini_api" (builtin IDs) - # Override in .env: ROLE_CHAT=claude_cli ROLE_DISTILL=gemini_api etc. - role_chat: str = "claude_cli" - role_orchestrator: str = "gemini_api" - role_distill: str = "claude_cli" - role_janitor: str = "claude_cli" # assign a cheap/fast model: Haiku 4.5, local Gemma E4B - role_coder: str = "claude_cli" - role_research: str = "gemini_api" + # All roles must be configured via /settings/models — no built-in fallback. + role_chat: str = "" + role_orchestrator: str = "" + role_distill: str = "" + role_janitor: str = "" + role_coder: str = "" + role_research: str = "" # Comma-separated list of standard roles shown in the model settings UI. # Add custom roles here to extend the UI without code changes. @@ -122,8 +117,8 @@ class Settings(BaseSettings): return [r.strip() for r in self.defined_roles.split(",") if r.strip()] def get_role_default(self, role: str) -> str: - """Return the .env default backend type for a role (e.g. 'claude_cli').""" - return getattr(self, f"role_{role.replace('-', '_')}", "claude_cli") + """Return the .env default backend type for a role, or '' if unconfigured.""" + return getattr(self, f"role_{role.replace('-', '_')}", "") def home_root(self) -> Path: """Resolve home_dir relative to this file's location if not absolute.""" diff --git a/cortex/llm_client.py b/cortex/llm_client.py index 1a0ddbc..98b30b6 100644 --- a/cortex/llm_client.py +++ b/cortex/llm_client.py @@ -1,50 +1,18 @@ import asyncio import logging -import os -import signal -import subprocess from config import settings -import event_bus logger = logging.getLogger(__name__) -# Track active Gemini process group IDs so we can kill them on shutdown -_active_pgroups: set[int] = set() - - -def _register_pgroup(pid: int) -> None: - _active_pgroups.add(pid) - - -def _unregister_pgroup(pid: int) -> None: - _active_pgroups.discard(pid) - - -async def cleanup() -> None: - """Kill any lingering Gemini process groups. Call from lifespan shutdown.""" - for pid in list(_active_pgroups): - try: - os.killpg(pid, signal.SIGKILL) - logger.info("Shutdown: killed Gemini process group %d", pid) - except ProcessLookupError: - pass - _active_pgroups.clear() - - -# Map from registry model type → dispatch function key _TYPE_TO_BACKEND = { - "claude_cli": "claude", - "gemini_cli": "gemini", # Gemini CLI is being replaced by Antigravity CLI (June 2026) - "gemini_api": "gemini", # routes to CLI subprocess — no users configured; kept for compat "local_openai": "local", "anthropic_api": "anthropic_api", } -# Explicit UI toggle values (kept for backward compat) -_EXPLICIT_BACKENDS = ("claude", "gemini", "local") -# Gemini CLI removed from the claude fallback — it's shutting down June 18 2026. -# claude failures now surface directly; gemini backend still falls back to claude. -_FALLBACK: dict[str, str | None] = {"claude": None, "gemini": "claude", "local": "claude", "anthropic_api": "claude"} +_FALLBACK: dict[str, str | None] = { + "local": None, + "anthropic_api": None, +} async def complete( @@ -55,16 +23,15 @@ async def complete( slot: str | None = None, max_tokens: int = 2048, attachment: dict | None = None, - token_sink=None, # async (str) -> None; if set, stream tokens as they arrive + token_sink=None, ) -> tuple[str, str]: """ Returns (response_text, actual_backend_used). - slot: Phase 3 — specific role slot ("primary" | "backup_1" | "backup_2"). - Resolves that exact slot, no fallback chain. Takes priority over model. - model: legacy backend override ("claude" | "gemini" | "local") from old toggle. - None = resolve via model registry for the given role. - role: registry role used for slot/auto routing (default: "chat"). + slot: explicit role slot ("primary" | "backup_1" | "backup_2"). + Resolves that exact slot, no fallback chain. Takes priority over role. + role: registry role used for auto routing (default: "chat"). + model: ignored — kept for API compatibility; routing is via slot/role only. """ import model_registry as _reg from persona import _user @@ -73,46 +40,33 @@ async def complete( resolved_cfg: dict | None = None if slot is not None: - # Phase 3: explicit slot selection — no fallback within the role resolved_cfg = _reg.get_model_for_slot(username, role, slot) if resolved_cfg: - primary = _TYPE_TO_BACKEND.get(resolved_cfg["type"], "claude") + primary = _TYPE_TO_BACKEND.get(resolved_cfg["type"], "local") else: - # Slot not configured — fall through to auto routing slot = None if slot is None: - if model in _EXPLICIT_BACKENDS: - # Legacy: explicit backend override from old UI toggle - if model == "local": - resolved_cfg = _reg.get_best_local_model(username, role) - if not resolved_cfg: - raise RuntimeError("No local model configured — add one at /settings/models") - primary = model + resolved = _reg.get_model_for_role(username, role) + if resolved: + resolved_cfg = resolved + primary = _TYPE_TO_BACKEND.get(resolved["type"], "local") else: - # Auto: role-based routing via model registry - resolved = _reg.get_model_for_role(username, role) - if resolved: - resolved_cfg = resolved - primary = _TYPE_TO_BACKEND.get(resolved["type"], "claude") - else: - primary = settings.primary_backend + raise RuntimeError( + f"No model configured for role '{role}'. " + "Add one at /settings/models." + ) - fallback = _FALLBACK.get(primary, "claude") + fallback = _FALLBACK.get(primary) try: response = await _dispatch(primary, system_prompt, messages, resolved_cfg, attachment=attachment, token_sink=token_sink) return response, primary except Exception as e: - err_str = str(e) - if primary == "claude" and any(k in err_str for k in ("401", "authenticate", "expired", "OAuth")): - await event_bus.publish({"type": "claude_auth_expired"}) - # Surface errors when a model is explicitly configured or a specific slot was pinned. if resolved_cfg is not None: logger.error("%s failed (no fallback — model explicitly configured): %s", primary, e) raise - # No fallback defined for this backend — surface the error directly. if not fallback: logger.error("%s failed (no fallback configured): %s", primary, e) raise @@ -129,9 +83,7 @@ async def _dispatch( attachment: dict | None = None, token_sink=None, ) -> str: - if backend == "gemini": - text = await _gemini(system_prompt, messages) - elif backend == "local": + if backend == "local": if token_sink: return await _local_streaming(token_sink, system_prompt, messages, model_cfg) text = await _local(system_prompt, messages, model_cfg, attachment=attachment) @@ -140,55 +92,12 @@ async def _dispatch( return await _anthropic_api_streaming(token_sink, system_prompt, messages, model_cfg) text = await _anthropic_api(system_prompt, messages, model_cfg) else: - text = await _claude(system_prompt, messages, model_cfg) - # For non-streaming backends when token_sink is provided, emit the full text as one chunk. + raise RuntimeError(f"Unknown backend '{backend}' — check model type in registry") if token_sink and text: await token_sink(text) return text -def _fresh_claude_token() -> str | None: - """Read the current OAuth access token from the Claude credentials file. - - The token in the systemd .env goes stale (it rotates on each login). - Reading directly from ~/.claude/.credentials.json always gets the latest. - """ - import json as _json - creds_path = os.path.expanduser("~/.claude/.credentials.json") - try: - with open(creds_path) as f: - data = _json.load(f) - return data["claudeAiOauth"]["accessToken"] - except Exception as e: - logger.debug("Could not read Claude credentials file: %s", e) - return None - - -async def _claude(system_prompt: str, messages: list[dict], model_cfg: dict | None) -> str: - model_name = (model_cfg or {}).get("model_name") if model_cfg else None - cmd = [ - "claude", "--print", - "--no-session-persistence", - "--output-format", "text", - ] - # Only pass --model if it's a real model name (not a backend type string) - if model_name and model_name not in ("claude", "gemini", "local", ""): - cmd.extend(["--model", model_name]) - if system_prompt: - cmd.extend(["--system-prompt", system_prompt]) - cmd.append(_build_conversation(messages)) - - # Always use the freshest token from the credentials file so the systemd - # service doesn't break when the env-var token rotates after a login. - env = os.environ.copy() - token = _fresh_claude_token() - if token: - env["CLAUDE_CODE_OAUTH_TOKEN"] = token - env.pop("ANTHROPIC_API_KEY", None) # never let a stale API key override OAuth - - return await _run(cmd, timeout=settings.timeout_claude, env=env) - - async def _local( system_prompt: str, messages: list[dict], @@ -413,106 +322,3 @@ async def _local_streaming( return full_text.strip() -async def _gemini(system_prompt: str, messages: list[dict]) -> str: - # Gemini CLI spawns MCP child processes that keep stdout pipes open after responding. - # start_new_session=True puts the whole tree in its own process group so - # os.killpg kills everything at once on timeout. - cmd = [ - "gemini", - "--output-format", "text", - "--extensions", "", # disable all extensions — prevents MCP child processes - "-p", _build_prompt(system_prompt, messages), - ] - - try: - proc = await asyncio.create_subprocess_exec( - *cmd, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - start_new_session=True, - ) - except FileNotFoundError: - raise RuntimeError("gemini not found in PATH") - - _register_pgroup(proc.pid) - timeout = settings.timeout_gemini - try: - stdout_bytes, _ = await asyncio.wait_for(proc.communicate(), timeout=timeout) - raw = stdout_bytes.decode() - except asyncio.TimeoutError: - try: - os.killpg(proc.pid, signal.SIGKILL) - except ProcessLookupError: - pass - raise RuntimeError(f"Gemini timed out after {timeout}s") - except asyncio.CancelledError: - try: - os.killpg(proc.pid, signal.SIGKILL) - except ProcessLookupError: - pass - raise - finally: - _unregister_pgroup(proc.pid) - - clean = _clean_gemini_output(raw) - if not clean: - raise RuntimeError("Gemini returned an empty response") - return clean - - -# Lines Gemini CLI writes to stdout that are not part of the actual response -_GEMINI_NOISE = ( - "Loaded cached credentials", - "Loading extension:", - "Server '", - "Listening for", - "Model is overloaded", - "High demand", - "Retrying", - "retrying", - "429", - "quota", -) - - -def _clean_gemini_output(text: str) -> str: - lines = [ - line for line in text.splitlines() - if not any(line.strip().startswith(p) for p in _GEMINI_NOISE) - ] - return "\n".join(lines).strip() - - -async def _run(cmd: list[str], timeout: int = 60, env: dict | None = None) -> str: - loop = asyncio.get_running_loop() - result = await loop.run_in_executor( - None, - lambda: subprocess.run(cmd, capture_output=True, text=True, timeout=timeout, env=env), - ) - if result.returncode != 0: - detail = result.stderr.strip() or result.stdout.strip() or f"exit code {result.returncode}" - raise RuntimeError(f"{cmd[0]} failed: {detail}") - return result.stdout.strip() - - -def _build_conversation(messages: list[dict]) -> str: - """Conversation only — used for Claude (system prompt passed separately).""" - parts = [] - prior = messages[:-1] - if prior: - history_lines = [] - for msg in prior: - label = settings.user_name if msg["role"] == "user" else settings.agent_name - history_lines.append(f"{label}: {msg['content']}") - parts.append("\n" + "\n\n".join(history_lines) + "\n") - parts.append(messages[-1]["content"] if messages else "") - return "\n\n".join(parts) - - -def _build_prompt(system_prompt: str, messages: list[dict]) -> str: - """Full prompt with system context embedded — used for Gemini.""" - parts = [] - if system_prompt: - parts.append(f"\n{system_prompt}\n") - parts.append(_build_conversation(messages)) - return "\n\n".join(parts) diff --git a/cortex/main.py b/cortex/main.py index a166023..05e3ea8 100644 --- a/cortex/main.py +++ b/cortex/main.py @@ -18,8 +18,6 @@ async def lifespan(app: FastAPI): scheduler.start() yield scheduler.stop() - from llm_client import cleanup - await cleanup() app = FastAPI(title="Cortex Dispatcher", lifespan=lifespan) diff --git a/cortex/routers/auth.py b/cortex/routers/auth.py index 3167edd..30d6907 100644 --- a/cortex/routers/auth.py +++ b/cortex/routers/auth.py @@ -1,76 +1,12 @@ """ -CLI auth status for both Claude and Gemini backends. - -GET /auth/status — returns per-backend auth info and warning flags - -Claude: warns when OAuth token is < WARN_HOURS from expiry (requires - user to re-run `claude` to refresh via browser flow). -Gemini: warns only when oauth_creds.json is missing or has no - refresh_token (access token rotates automatically every ~1h). +GET /auth/status — returns connectivity status for configured model backends. """ -import json import logging -from datetime import datetime, timezone -from pathlib import Path from fastapi import APIRouter -from config import settings logger = logging.getLogger(__name__) router = APIRouter(prefix="/auth") -CLAUDE_CREDS = Path.home() / ".claude" / ".credentials.json" -GEMINI_CREDS = Path.home() / ".gemini" / "oauth_creds.json" -GEMINI_ACCTS = Path.home() / ".gemini" / "google_accounts.json" -WARN_HOURS = 24 # no refresh token — warn a day ahead -WARN_HOURS_REFRESH = 1 # refresh token present — only warn if CLI hasn't rotated in time - - -def _claude_status() -> dict: - try: - data = json.loads(CLAUDE_CREDS.read_text()) - oauth = data["claudeAiOauth"] - has_refresh = bool(oauth.get("refreshToken")) - expires_dt = datetime.fromtimestamp(oauth["expiresAt"] / 1000, tz=timezone.utc) - now = datetime.now(tz=timezone.utc) - hours_remaining = (expires_dt - now).total_seconds() / 3600 - # When a refresh token is present the CLI *should* auto-rotate the access - # token, but sometimes it doesn't. Use a tight 1-hour window so a fresh - # 8-hour token doesn't immediately trigger a warning, but a stale token - # that the CLI missed will still surface before it expires. - expired = hours_remaining <= 0 - threshold = WARN_HOURS_REFRESH if has_refresh else WARN_HOURS - warning = expired or hours_remaining < threshold - return { - "ok": True, - "has_refresh_token": has_refresh, - "access_token_expires_at": expires_dt.isoformat(), - "access_token_hours_remaining": round(hours_remaining, 1), - "warning": warning, - "expired": expired, - } - except Exception as e: - logger.warning("claude auth check failed: %s", e) - return {"ok": False, "error": str(e), "warning": True, "expired": False} - - -def _gemini_status() -> dict: - try: - creds = json.loads(GEMINI_CREDS.read_text()) - if not creds.get("refresh_token"): - return {"ok": True, "authenticated": False, "warning": True, "account": None} - account = None - try: - accts = json.loads(GEMINI_ACCTS.read_text()) - account = accts.get("active") - except Exception: - pass - return {"ok": True, "authenticated": True, "warning": False, "account": account} - except FileNotFoundError: - return {"ok": True, "authenticated": False, "warning": True, "account": None} - except Exception as e: - logger.warning("gemini auth check failed: %s", e) - return {"ok": False, "error": str(e), "warning": True, "authenticated": False} - async def _local_status(username: str = "scott") -> dict: """Check reachability of the user's configured local model host.""" @@ -104,7 +40,5 @@ async def _local_status(username: str = "scott") -> dict: @router.get("/status") async def auth_status() -> dict: return { - "claude": _claude_status(), - "gemini": _gemini_status(), "local": await _local_status(), } diff --git a/cortex/routers/chat.py b/cortex/routers/chat.py index a0af5bd..009dbed 100644 --- a/cortex/routers/chat.py +++ b/cortex/routers/chat.py @@ -21,11 +21,7 @@ router = APIRouter() def _backend_label(backend: str, username: str, role: str = "chat") -> str: - """Human-readable label for the model that handled a request (legacy path).""" - if backend == "claude": - return "Claude" - if backend == "gemini": - return "Gemini" + """Human-readable label for the model that handled a request.""" if backend == "local": cfg = model_registry.get_best_local_model(username, role) if cfg: @@ -52,7 +48,7 @@ class ChatRequest(BaseModel): message: str session_id: str | None = None tier: int | None = None - model: str | None = None # legacy backend override ("claude"|"gemini"|"local") + model: str | None = None # ignored — kept for API compatibility slot: str | None = None # Phase 3: explicit slot ("primary"|"backup_1"|"backup_2") chat_role: str = "chat" # active role: "chat"|"coder"|"research"|"distill" etc. include_long: bool = True @@ -64,10 +60,6 @@ class ChatRequest(BaseModel): attachment: Attachment | None = None # image attachment (text files injected client-side) -class BackendRequest(BaseModel): - primary: str # "claude", "gemini", or "local" - - class NoteRequest(BaseModel): session_id: str note: str @@ -183,9 +175,6 @@ async def _stream_chat(req: ChatRequest): yield f"data: {json.dumps({'type': 'error', 'message': str(e)})}\n\n" finally: - # Ensure the LLM task is cancelled if the generator is torn down - # (e.g. client disconnect or server shutdown). This propagates - # CancelledError into _gemini() which kills the process group. if not task.done(): task.cancel() try: @@ -203,10 +192,6 @@ async def chat(req: ChatRequest) -> StreamingResponse: ) -_BACKEND_CYCLE = ("claude", "gemini", "local") -_BACKEND_FALLBACK = {"claude": "gemini", "gemini": "claude", "local": "claude"} - - def _request_user(request: Request) -> str | None: """Extract username from JWT cookie, or None.""" try: @@ -216,20 +201,6 @@ def _request_user(request: Request) -> str | None: return None -def _local_model_info(request: Request) -> dict | None: - """Return the best local model {label, model_name} for the session user, or None.""" - username = _request_user(request) - if not username: - return None - try: - cfg = model_registry.get_best_local_model(username, "chat") - if cfg: - return {"label": cfg.get("label", ""), "model_name": cfg.get("model_name", "")} - except Exception: - pass - return None - - def _chat_slot_models(username: str) -> list[dict]: """Return [{slot, label, type}] for each configured slot in the chat role, primary first.""" registry = model_registry.get_registry(username) @@ -279,7 +250,6 @@ async def get_backend(request: Request) -> dict: username = _request_user(request) chat_models = _chat_slot_models(username) if username else [] available_roles = _available_roles_for_toggle(username) if username else [] - p = settings.primary_backend orch_label = None if username: @@ -288,25 +258,9 @@ async def get_backend(request: Request) -> dict: orch_label = orch_cfg.get("label") or orch_cfg.get("model_name") or None return { - "chat_models": chat_models, # Phase 3: [{slot, label, type}] for chat-role slots - "available_roles": available_roles, # kept for banner + backward compat + "chat_models": chat_models, + "available_roles": available_roles, "orchestrator_model": orch_label, - # Legacy fields kept for backward compat - "primary": p, - "fallback": _BACKEND_FALLBACK.get(p, "claude"), - "local_model": _local_model_info(request), - } - - -@router.post("/backend") -async def set_backend(req: BackendRequest, request: Request) -> dict: - if req.primary not in _BACKEND_CYCLE: - raise HTTPException(status_code=400, detail="primary must be 'claude', 'gemini', or 'local'") - settings.primary_backend = req.primary - return { - "primary": req.primary, - "fallback": _BACKEND_FALLBACK[req.primary], - "local_model": _local_model_info(request), } diff --git a/cortex/routers/local_llm.py b/cortex/routers/local_llm.py index 0669fda..a9fd599 100644 --- a/cortex/routers/local_llm.py +++ b/cortex/routers/local_llm.py @@ -744,6 +744,53 @@ async def remove_custom_role_route( return RedirectResponse("/settings/models#roles", status_code=303) +@router.post("/api/models/{model_id}/edit") +async def edit_model_ajax( + request: Request, + model_id: str, + mtype: str = Form(""), + label: str = Form(""), + model_name: str = Form(""), + context_k: int = Form(0), + max_rounds: int = Form(0), + tools: int = Form(1), + tags: str = Form(""), + reasoning_budget_tokens: int = Form(0), + host_id: str = Form(""), + account_id: str = Form(""), + credential_id: str = Form("cli"), +) -> JSONResponse: + """AJAX: edit a model entry. Returns JSON {ok, label, model_name} on success.""" + username = _get_user(request) + if not username: + return JSONResponse({"error": "Not authenticated"}, status_code=401) + if not model_name.strip(): + return JSONResponse({"error": "Model name is required."}, status_code=400) + tag_list = [t.strip() for t in tags.split(",") if t.strip()] + max_rounds_ = max_rounds or None + tools_bool = tools != 0 + reasoning_budget_ = reasoning_budget_tokens or None + if mtype == "local_openai": + if not host_id.strip(): + return JSONResponse({"error": "Select a host for this model."}, status_code=400) + reg.save_model(username, model_id, host_id, label, model_name, context_k, tag_list, + max_rounds=max_rounds_, tools=tools_bool, + reasoning_budget_tokens=reasoning_budget_) + elif mtype == "gemini_api": + reg.save_cloud_model(username, model_id, "google", model_name, label, + account_id=account_id or None, context_k=context_k, tags=tag_list, + max_rounds=max_rounds_, tools=tools_bool) + elif mtype in ("claude_cli", "anthropic_api"): + reg.save_cloud_model(username, model_id, "anthropic", model_name, label, + credential_id=credential_id or "cli", context_k=context_k, tags=tag_list, + max_rounds=max_rounds_, tools=tools_bool) + else: + return JSONResponse({"error": f"Unknown model type: {mtype}"}, status_code=400) + display = label.strip() or model_name.strip() + logger.info("model edited (ajax): %s / %s (%s)", username, display, mtype) + return JSONResponse({"ok": True, "label": display, "model_name": model_name.strip()}) + + @router.post("/api/models/role") async def set_role(request: Request) -> JSONResponse: """AJAX: assign a model to a role priority slot. diff --git a/cortex/static/HELP.md b/cortex/static/HELP.md index 64e6989..e2553d8 100644 --- a/cortex/static/HELP.md +++ b/cortex/static/HELP.md @@ -6,7 +6,7 @@ and are appended automatically by help.html when present. --> -*Last updated: 2026-05-13* +*Last updated: 2026-06-18* --- @@ -44,7 +44,7 @@ The **Context & Memory** panel (sliders icon with tier number) contains all conf | **Memory Layers** | Toggle Long / Mid / Short memory on/off | | **Distill Memory** | Manually trigger Short / Mid / Long / All distillation | | **Model** | Active chat model — click to cycle through your configured slot models (Primary → Backup 1 → …) | -| **Display** | **Aa** cycles font size · **☾** toggles theme · **S/M/L** cycles input area height · **⌃↵** toggles send shortcut | +| **Display** | **Aa** cycles font size · **☾** toggles theme · **S/M/L/XL** cycles input area height · **⌃↵** toggles send shortcut | All settings persist in `localStorage` across page refreshes. @@ -74,7 +74,7 @@ The orchestrator runs a multi-step tool loop: 3. The model produces the final user-facing reply — when the orchestrator role uses Gemini, Claude writes the final response; when it uses a local model, that same model writes it 4. Expandable tool-call cards appear above the response — click any card to see the arguments sent and the result returned -The ⚡ toggle is **independent of the Role selector** — you can use any role (chat, coder, research, etc.) with or without tools. The orchestrator model is configured in **Account → Model Registry → Role Assignments → Orchestrator**. +The ⚡ toggle routes requests through the **Orchestrator** role model regardless of which chat model is active. Configure it in **Account → Model Registry → Role Assignments → Orchestrator**. Tools mode is best for tasks requiring research, multi-step reasoning, or side effects (e.g. "search for X", "add a task", "what's on my list?", "append this to my journal"). Regular chat is faster for conversational turns. @@ -156,7 +156,7 @@ Once installed, opening Cortex from the home screen or app launcher skips the br ## Switching Models -The **Model** button in the Context & Memory panel cycles through the slot models configured for your active role (Primary → Backup 1). Click it to switch between models mid-session. +The **Model** button in the Context & Memory panel cycles through the slot models configured for your **Chat** role (Primary → Backup 1). Click it to switch between models mid-session. - The button label shows the active model (e.g. "GPT-4o", "Gemini 2.5 Flash") - The selected slot is sent with each chat request so the correct model is used @@ -205,12 +205,11 @@ The table shows all-time totals per model key, with columns for: Values ≥ 1,000 are displayed as `k` (e.g. `24.3k`). -**What is and isn't tracked:** +**What is tracked:** -- ✅ Gemini API calls (orchestrator, distillation) +- ✅ Anthropic API calls (direct SDK) - ✅ Local OpenAI-compatible calls (Open WebUI, Ollama, OpenRouter) -- ✗ Claude CLI — no structured token data is returned by the subprocess -- ✗ Gemini CLI — same reason +- ✅ Gemini API calls (orchestrator, distillation) The raw data lives in `home/{username}/usage.json` and is also accessible via the Files panel or the API. @@ -230,9 +229,10 @@ Configure which AI models are available and which handles each task type. Do this before adding models — models need a provider account or local host to attach to. -**Anthropic (Claude):** Two options: -- **CLI (OAuth):** Nothing to configure — uses your existing `claude auth login` session. If Claude isn't working, run `claude auth login` in a terminal. -- **Direct API key:** Scroll to **Cloud Providers → Anthropic** → click **+ Add API key**. Enter a label and your `sk-ant-…` key from [console.anthropic.com/keys](https://console.anthropic.com/keys). When you add a model using an API key credential, it routes through the Anthropic SDK instead of the CLI. +**Anthropic (Claude):** Uses a direct API key — no Claude CLI required: +- Scroll to **Cloud Providers → Anthropic** → click **+ Add API key** +- Enter a label and your `sk-ant-…` key from [console.anthropic.com/keys](https://console.anthropic.com/keys) +- Models added with this credential call the Anthropic API directly via the SDK **Google (Gemini):** Add one entry per API key you want to use: 1. Scroll to **Cloud Providers → Google** → click **+ Add Google account** @@ -261,7 +261,7 @@ Scroll to **Add Model**. Select the provider tab, fill in the details, click **A |---|---| | **Local** | Select a host (from Step 1) → enter model name, or use **Fetch from host** to pick from a live list | | **Google** | Select a Gemini model from the catalog → select a Google account (from Step 1) | -| **Anthropic** | Select a credential (CLI OAuth or an API key added in Step 1) → select a Claude model from the catalog | +| **Anthropic** | Select an API key credential (from Step 1) → select a Claude model from the catalog | The label and context window size auto-fill from the catalog — edit them if you want. Tags are optional. @@ -286,7 +286,7 @@ Scroll to **Role Assignments** at the bottom of the page. Each role has **Primar | **Coder** | Code-focused tasks — larger context window, code-aware model | | **Research** | Long-context research — high-token model, web tools prioritized | -Switch roles via the **Role** selector in the Context & Memory panel (⚙). Leave all slots empty to use the server default. +Leave all slots empty to use the server default. **Per-role tool sets:** Expand any role card to configure which tool categories the orchestrator can use when that role is active. Unchecked categories are hidden from the model entirely — reducing token overhead on every orchestrated call. Leaving all categories unchecked means all tools the user has access to are available (the default). @@ -390,6 +390,7 @@ Distillation builds up the memory layers from raw session logs. Runs automatical | **mid** | LLM summarizes `MEMORY_SHORT.md` → `MEMORY_MID.md` | | **long** | LLM integrates `MEMORY_MID.md` → `MEMORY_LONG.md` | | **all** | Runs short → mid → long in sequence | +| **Rebuild** | ⚠ Wipes Mid + Long memories and rebuilds from session logs. Use to recover from distillation drift. Hand-edited content will be replaced. | **Recommended workflow:** run **short** after any productive session; **mid** weekly; **long** monthly. @@ -462,8 +463,7 @@ For direct access or scripting: | Method | Endpoint | Description | |---|---|---| | `POST` | `/chat` | Send a message — returns SSE stream | -| `GET` | `/backend` | Get current primary/fallback backends | -| `POST` | `/backend` | Set primary backend (`{"primary": "claude"}`) | +| `GET` | `/backend` | Get configured model slots and orchestrator | | `GET` | `/sessions` | List all sessions | | `GET` | `/history/{id}` | Get session message history | | `PUT` | `/history/{id}` | Replace full session history | diff --git a/cortex/static/app.js b/cortex/static/app.js index 546d5e9..9c75938 100644 --- a/cortex/static/app.js +++ b/cortex/static/app.js @@ -140,15 +140,16 @@ }); // ── Textarea height ────────────────────────────────────────── - const HEIGHT_SIZES = [120, 240, 480]; - const HEIGHT_LABELS = ['S', 'M', 'L']; + const HEIGHT_SIZES = [120, 240, 480, 720]; + const HEIGHT_LABELS = ['S', 'M', 'L', 'XL']; const HEIGHT_TITLES = [ 'Input size: Compact — click to cycle', 'Input size: Medium — click to cycle', 'Input size: Large — click to cycle', + 'Input size: Extra Large — click to cycle', ]; - let maxHeight = parseInt(localStorage.getItem('maxHeight') || '120'); + let maxHeight = parseInt(localStorage.getItem('maxHeight') || '240'); const heightCycleBtn = document.getElementById('height-cycle-btn'); function syncHeight() { diff --git a/cortex/static/index.html b/cortex/static/index.html index c97ffab..c035971 100644 --- a/cortex/static/index.html +++ b/cortex/static/index.html @@ -115,9 +115,9 @@
-
Role
+
Model
- +
@@ -167,24 +167,6 @@
- -
- - -
- - - - - - - -
-
+ +
+
+ + +
+
+ + + + + + + +
diff --git a/cortex/static/local_llm.html b/cortex/static/local_llm.html index 1177437..90f0bf5 100644 --- a/cortex/static/local_llm.html +++ b/cortex/static/local_llm.html @@ -982,6 +982,42 @@ }); }); + // ── Model edit: AJAX save (stay on Models tab) ──────────────────────────── + document.querySelectorAll('.model-edit-form').forEach(form => { + form.addEventListener('submit', async e => { + e.preventDefault(); + const id = form.id.replace('edit-form-', ''); + const saveBtn = form.querySelector('button[type="submit"]'); + saveBtn.disabled = true; + try { + const res = await fetch(`/api/models/${id}/edit`, {method: 'POST', body: new FormData(form)}); + const data = await res.json(); + if (data.ok) { + // Update the row header label in place + const row = document.getElementById('model-' + id); + if (row && data.label) { + const labelEl = row.querySelector('.model-label'); + if (labelEl) labelEl.textContent = data.label; + } + if (row && data.model_name) { + const nameEl = row.querySelector('.model-name'); + if (nameEl) nameEl.textContent = data.model_name; + } + // Close the edit panel + form.style.display = 'none'; + document.querySelector(`.model-edit-btn[data-id="${id}"]`).textContent = 'Edit'; + showToast('Model saved'); + } else { + showToast(data.error || 'Save failed', true); + } + } catch (err) { + showToast(err.message, true); + } finally { + saveBtn.disabled = false; + } + }); + }); + // ── Edit form: fetch from host ──────────────────────────────────────────── document.querySelectorAll('.edit-fetch-btn').forEach(btn => { btn.addEventListener('click', async () => { diff --git a/cortex/static/style.css b/cortex/static/style.css index 0f2f0bc..a3429ca 100644 --- a/cortex/static/style.css +++ b/cortex/static/style.css @@ -735,35 +735,28 @@ .message.note-private .note-content { color: #c9a84c; white-space: pre-wrap; } .message.note-public .note-content { color: #4abfb0; white-space: pre-wrap; } - /* ── Input area — 3-col: [mode-toggle] [textarea] [send-col] ── */ + /* ── Input area — column: [attachment?] [textarea] [toolbar] ── */ #input-area { - padding: 12px 20px; + padding: 10px 20px 12px; background: var(--surface); border-top: 1px solid var(--border); display: flex; - flex-direction: row; - gap: 10px; - align-items: flex-end; + flex-direction: column; + gap: 6px; } - /* ── Mode select — compact dropdown ─────────────────────────── */ + /* ── Compact toolbar below the textarea ─────────────────────── */ + #input-toolbar { + display: flex; + flex-direction: row; + align-items: center; + gap: 6px; + } + + /* ── Mode select — positioned container for dropdown only ────── */ #mode-select { position: relative; flex-shrink: 0; - display: flex; - flex-direction: column; - align-items: stretch; - gap: 4px; - } - - /* S: collapse to a single row — mode button + compact tools toggle */ - #mode-select[data-size="s"] { - flex-direction: row; - align-items: center; - } - #mode-select[data-size="s"] #tools-toggle { - padding: 3px 7px; - font-size: 0.75rem; } #mode-select-btn { @@ -874,8 +867,7 @@ #attach-btn:hover { color: rgba(255,255,255,0.6); border-color: rgba(255,255,255,0.25); } #attachment-row { - padding: 0.3rem 0.5rem; - border-bottom: 1px solid var(--border); + padding: 0.2rem 0; } #attachment-preview { display: inline-flex; @@ -914,7 +906,8 @@ #attachment-clear:hover { color: var(--text); } #input { - flex: 1; + width: 100%; + box-sizing: border-box; background: var(--bg); border: 1px solid var(--border); border-radius: 8px; @@ -936,16 +929,7 @@ #input.mode-note.public:focus { border-color: rgba(40,170,150,0.85); } #input.mode-otr { border-color: rgba(120,80,160,0.4); background: rgba(120,80,160,0.04); } - /* Send column — right side, stacked */ - #send-col { - display: flex; - flex-direction: column; - align-items: stretch; - gap: 4px; - flex-shrink: 0; - } - - /* Send button */ + /* Send button — sits in #input-toolbar row */ #send { display: flex; align-items: center; @@ -955,11 +939,12 @@ border: 1px solid var(--user-border); color: var(--text); border-radius: 8px; - padding: 10px 14px; + padding: 7px 16px; cursor: pointer; font-size: 0.9rem; text-align: center; white-space: nowrap; + flex-shrink: 0; transition: background 0.15s; } @@ -977,10 +962,11 @@ border: 1px solid var(--error-border); color: var(--error-text); border-radius: 8px; - padding: 10px 14px; + padding: 7px 14px; cursor: pointer; font-size: 0.9rem; text-align: center; + flex-shrink: 0; transition: background 0.15s; } diff --git a/cortex/tools/_projects.py b/cortex/tools/_projects.py new file mode 100644 index 0000000..e8daaae --- /dev/null +++ b/cortex/tools/_projects.py @@ -0,0 +1,31 @@ +"""Shared project alias registry for Cortex tools.""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + + +@dataclass +class ProjectDef: + path: str # path on the host where the project lives (~ is expanded at runtime) + ssh_host: str = "" # if set, git/aider commands run via SSH on this host + + +_CORTEX_ROOT_STR: str = str(Path(__file__).parent.parent.parent.resolve()) + +PROJECT_ALIASES: dict[str, ProjectDef] = { + "cortex": ProjectDef(path=_CORTEX_ROOT_STR), + "aether_api": ProjectDef( + path="~/OSIT_dev/aether_api_fastapi", + ssh_host="scott-wks-main-i7", + ), + "aether_frontend": ProjectDef( + path="~/OSIT_dev/aether_app_sveltekit", + ssh_host="scott-wks-main-i7", + ), + "aether_container": ProjectDef( + path="~/OSIT_dev/aether_container_env", + ssh_host="scott-wks-main-i7", + ), +} diff --git a/cortex/tools/aider.py b/cortex/tools/aider.py index 3f547ab..5176943 100644 --- a/cortex/tools/aider.py +++ b/cortex/tools/aider.py @@ -16,25 +16,16 @@ background=True runs the subprocess asynchronously and returns an agent_id immed import asyncio import logging import os +import shlex from pathlib import Path from google.genai import types import agent_manager +from ._projects import PROJECT_ALIASES logger = logging.getLogger(__name__) -_CORTEX_DIR = Path(__file__).parent # .../Cortex_and_Inara_dev/cortex/ -_PROJECT_ROOT = _CORTEX_DIR.parent # .../Cortex_and_Inara_dev/ - -# Known project aliases — expand before passing to subprocess -_PROJECT_ALIASES: dict[str, str] = { - "cortex": str(_PROJECT_ROOT), - "aether_api": "~/OSIT_dev/aether_api_fastapi", - "aether_frontend": "~/OSIT_dev/aether_app_sveltekit", - "aether_container": "~/OSIT_dev/aether_container_env", -} - _MAX_OUTPUT_CHARS = 12_000 # Maps URL fragments → Aider --api-key provider slug. @@ -192,11 +183,16 @@ async def aider_run( immediately. Use agent_status(agent_id) to check progress; set notify=True to receive a push/Talk notification on completion. """ - resolved = _PROJECT_ALIASES.get(project, project) - cwd = Path(os.path.expanduser(resolved)) + proj_def = PROJECT_ALIASES.get(project) + if proj_def is not None: + cwd = Path(os.path.expanduser(proj_def.path)) + ssh_host = proj_def.ssh_host + else: + cwd = Path(os.path.expanduser(project)) + ssh_host = "" - if not cwd.is_dir(): - return f"Error: project directory '{resolved}' does not exist." + if not ssh_host and not cwd.is_dir(): + return f"Error: project directory '{cwd}' does not exist." timeout = min(max(int(timeout), 10), 600) @@ -232,17 +228,28 @@ async def aider_run( cmd += ["--file", f] logger.info( - "aider_run: project=%s model=%s host_label=%s auto_commit=%s background=%s task=%.120s", - project, model, host_label, auto_commit, background, task, + "aider_run: project=%s ssh_host=%s model=%s host_label=%s auto_commit=%s background=%s task=%.120s", + project, ssh_host or "local", model, host_label, auto_commit, background, task, ) async def _run() -> str: - proc = await asyncio.create_subprocess_exec( - *cmd, - cwd=str(cwd), - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) + if ssh_host: + # Run aider natively on the remote host via a login shell so PATH + # includes ~/.local/bin where aider is typically installed. + inner_cmd = "cd " + shlex.quote(str(cwd)) + " && " + shlex.join(cmd) + ssh_cmd = f"bash -l -c {shlex.quote(inner_cmd)}" + proc = await asyncio.create_subprocess_exec( + "ssh", ssh_host, ssh_cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + else: + proc = await asyncio.create_subprocess_exec( + *cmd, + cwd=str(cwd), + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=float(timeout)) out = stdout.decode(errors="replace").strip() @@ -323,6 +330,8 @@ DECLARATIONS = [ "Credentials are resolved automatically from the Cortex model registry — " "OpenRouter, local Open WebUI/Ollama, Anthropic API, and other configured hosts " "are all supported. Use host_label to pick a specific host. " + "aether_api, aether_frontend, and aether_container run aider natively on the " + "workstation (scott-wks-main-i7) via SSH — aider must be installed there. " "Set background=True for long tasks — returns an agent_id immediately and sends " "a notification when done. ADMIN ONLY. Requires confirmation." ), diff --git a/cortex/tools/git.py b/cortex/tools/git.py index a0674fa..e2034c0 100644 --- a/cortex/tools/git.py +++ b/cortex/tools/git.py @@ -13,26 +13,23 @@ Write operations (admin-only, confirm-required): All tools accept an optional `project` parameter using the same aliases as aider_run: "cortex" (default), "aether_api", "aether_frontend", "aether_container" Or pass an absolute path directly. + +Projects with an ssh_host defined in _projects.py run all git commands on the remote +host via SSH, using shlex-quoted commands to handle paths and arguments safely. """ import asyncio import logging import os +import shlex from pathlib import Path from google.genai import types +from ._projects import PROJECT_ALIASES + logger = logging.getLogger(__name__) -_CORTEX_ROOT: Path = Path(__file__).parent.parent.parent.resolve() - -_PROJECT_ALIASES: dict[str, str] = { - "cortex": str(_CORTEX_ROOT), - "aether_api": "~/OSIT_dev/aether_api_fastapi", - "aether_frontend": "~/OSIT_dev/aether_app_sveltekit", - "aether_container": "~/OSIT_dev/aether_container_env", -} - _MAX_OUTPUT = 50_000 _PROJECT_PARAM = types.Schema( @@ -45,21 +42,34 @@ _PROJECT_PARAM = types.Schema( ) -def _resolve_project(project: str) -> Path: - """Resolve a project alias or path string to an absolute Path.""" +def _resolve_project(project: str) -> tuple[Path, str]: + """Return (path, ssh_host). path may not exist locally when ssh_host is set.""" if not project: - return _CORTEX_ROOT - resolved = _PROJECT_ALIASES.get(project, project) - return Path(os.path.expanduser(resolved)) + d = PROJECT_ALIASES["cortex"] + else: + d = PROJECT_ALIASES.get(project) + if d is None: + # Raw path — no SSH routing + return Path(os.path.expanduser(project)), "" + return Path(os.path.expanduser(d.path)), d.ssh_host -async def _git(*args: str, cwd: Path, timeout: int = 15) -> tuple[int, str]: - """Run a git command in cwd. Returns (returncode, combined output).""" - proc = await asyncio.create_subprocess_exec( - "git", "-C", str(cwd), *args, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) +async def _git(*args: str, cwd: Path, ssh_host: str = "", timeout: int = 15) -> tuple[int, str]: + """Run a git command locally or via SSH. Returns (returncode, combined output).""" + if ssh_host: + # Build a single shell-safe command string for the remote shell + remote_cmd = shlex.join(["git", "-C", str(cwd)] + list(args)) + proc = await asyncio.create_subprocess_exec( + "ssh", ssh_host, remote_cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + else: + proc = await asyncio.create_subprocess_exec( + "git", "-C", str(cwd), *args, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) try: stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout) except asyncio.TimeoutError: @@ -80,10 +90,10 @@ def _cap(text: str) -> str: async def git_status(project: str = "") -> str: """Return the working tree status for a project.""" - cwd = _resolve_project(project) - if not cwd.is_dir(): + cwd, ssh_host = _resolve_project(project) + if not ssh_host and not cwd.is_dir(): return f"Error: project directory not found: {cwd}" - rc, out = await _git("status", cwd=cwd) + rc, out = await _git("status", cwd=cwd, ssh_host=ssh_host) if rc != 0: return f"git status failed: {out}" return out or "Working tree clean — nothing to report." @@ -91,8 +101,8 @@ async def git_status(project: str = "") -> str: async def git_log(n: int = 20, path: str = "", oneline: bool = True, project: str = "") -> str: """Return recent commit history for a project.""" - cwd = _resolve_project(project) - if not cwd.is_dir(): + cwd, ssh_host = _resolve_project(project) + if not ssh_host and not cwd.is_dir(): return f"Error: project directory not found: {cwd}" args = ["log"] if oneline: @@ -102,7 +112,7 @@ async def git_log(n: int = 20, path: str = "", oneline: bool = True, project: st args += [f"-{max(1, min(n, 200))}"] if path: args += ["--", path] - rc, out = await _git(*args, cwd=cwd) + rc, out = await _git(*args, cwd=cwd, ssh_host=ssh_host) if rc != 0: return f"git log failed: {out}" return _cap(out) or "No commits found." @@ -110,8 +120,8 @@ async def git_log(n: int = 20, path: str = "", oneline: bool = True, project: st async def git_diff(ref_a: str = "", ref_b: str = "", path: str = "", stat_only: bool = False, project: str = "") -> str: """Show a diff for a project. Defaults to working tree vs HEAD.""" - cwd = _resolve_project(project) - if not cwd.is_dir(): + cwd, ssh_host = _resolve_project(project) + if not ssh_host and not cwd.is_dir(): return f"Error: project directory not found: {cwd}" args = ["diff"] if stat_only: @@ -122,7 +132,7 @@ async def git_diff(ref_a: str = "", ref_b: str = "", path: str = "", stat_only: args += [ref_a] if path: args += ["--", path] - rc, out = await _git(*args, cwd=cwd) + rc, out = await _git(*args, cwd=cwd, ssh_host=ssh_host) # diff exits 1 when differences exist — normal if rc not in (0, 1): return f"git diff failed: {out}" @@ -133,29 +143,27 @@ async def git_diff(ref_a: str = "", ref_b: str = "", path: str = "", stat_only: async def git_commit(message: str, project: str = "", files: list[str] | None = None) -> str: """Stage files and create a commit in a project.""" - cwd = _resolve_project(project) - if not cwd.is_dir(): + cwd, ssh_host = _resolve_project(project) + if not ssh_host and not cwd.is_dir(): return f"Error: project directory not found: {cwd}" if not message.strip(): return "Error: commit message is required." - # Stage specified files or all changes if files: for f in files: - rc, out = await _git("add", "--", f, cwd=cwd) + rc, out = await _git("add", "--", f, cwd=cwd, ssh_host=ssh_host) if rc != 0: return f"git add '{f}' failed: {out}" else: - rc, out = await _git("add", "-A", cwd=cwd) + rc, out = await _git("add", "-A", cwd=cwd, ssh_host=ssh_host) if rc != 0: return f"git add -A failed: {out}" - # Check that something is actually staged - rc, staged = await _git("diff", "--cached", "--stat", cwd=cwd) + rc, staged = await _git("diff", "--cached", "--stat", cwd=cwd, ssh_host=ssh_host) if not staged.strip(): return "Nothing staged to commit — working tree already clean." - rc, out = await _git("commit", "-m", message, cwd=cwd) + rc, out = await _git("commit", "-m", message, cwd=cwd, ssh_host=ssh_host) if rc != 0: return f"git commit failed: {out}" return out or "Committed successfully." @@ -163,15 +171,15 @@ async def git_commit(message: str, project: str = "", files: list[str] | None = async def git_push(project: str = "", remote: str = "origin", branch: str = "") -> str: """Push the current branch to a remote.""" - cwd = _resolve_project(project) - if not cwd.is_dir(): + cwd, ssh_host = _resolve_project(project) + if not ssh_host and not cwd.is_dir(): return f"Error: project directory not found: {cwd}" args = ["push", remote] if branch: args.append(branch) - rc, out = await _git(*args, cwd=cwd, timeout=30) + rc, out = await _git(*args, cwd=cwd, ssh_host=ssh_host, timeout=30) if rc != 0: return f"git push failed: {out}" return out or f"Pushed to {remote} successfully." @@ -185,7 +193,8 @@ DECLARATIONS = [ description=( "Show the working tree status for a project: staged changes, unstaged " "modifications, and untracked files. Use before committing to see what " - "will be included. Defaults to the Cortex project." + "will be included. Defaults to the Cortex project. " + "aether_api, aether_frontend, and aether_container run on the workstation via SSH." ), parameters=types.Schema( type=types.Type.OBJECT, @@ -197,7 +206,8 @@ DECLARATIONS = [ description=( "Show recent commit history for a project. Returns commit hashes, dates, " "and messages. Use after aider_run completes to see what was committed. " - "Defaults to the Cortex project." + "Defaults to the Cortex project. " + "aether_api, aether_frontend, and aether_container run on the workstation via SSH." ), parameters=types.Schema( type=types.Type.OBJECT, @@ -226,7 +236,8 @@ DECLARATIONS = [ "With ref_a only: changes between that ref and HEAD. " "With ref_a and ref_b: changes between the two refs. " "Use after aider_run (auto_commit=False) to review changes before committing. " - "Defaults to the Cortex project." + "Defaults to the Cortex project. " + "aether_api, aether_frontend, and aether_container run on the workstation via SSH." ), parameters=types.Schema( type=types.Type.OBJECT, @@ -257,6 +268,7 @@ DECLARATIONS = [ "Stage files and create a git commit in a project. " "Use after reviewing changes with git_diff — especially when aider_run ran " "with auto_commit=False. Stages all changes by default (files=None). " + "aether_api, aether_frontend, and aether_container commit on the workstation via SSH. " "ADMIN ONLY. Requires confirmation." ), parameters=types.Schema( @@ -284,6 +296,7 @@ DECLARATIONS = [ description=( "Push the current branch to a remote. " "Use after git_commit or after aider_run commits to share the changes. " + "aether_api, aether_frontend, and aether_container push on the workstation via SSH. " "ADMIN ONLY. Requires confirmation." ), parameters=types.Schema( diff --git a/documentation/ARCH__BACKENDS.md b/documentation/ARCH__BACKENDS.md index b8706cc..6e443ff 100644 --- a/documentation/ARCH__BACKENDS.md +++ b/documentation/ARCH__BACKENDS.md @@ -1,20 +1,21 @@ # Architecture: LLM Backends > How Cortex selects and talks to AI models. -> Last updated: 2026-05-06 +> Last updated: 2026-06-18 --- ## Providers -Cortex supports four model types, each dispatched differently: +Cortex supports two model types, each dispatched differently: | Type | Auth | Use | |---|---|---| -| `claude_cli` | OAuth token from `~/.claude/.credentials.json` | Chat, persona responses | -| `gemini_cli` | Gemini CLI credentials | Chat fallback / explicit selection | -| `gemini_api` | API key from registry account or `.env` | Orchestrator tool loop | -| `local_openai` | API key per host in model registry | Open WebUI, Ollama, OpenRouter, LiteLLM, etc. | +| `local_openai` | API key per host in model registry | Open WebUI, Ollama, OpenRouter, LiteLLM, any OpenAI-compatible endpoint | +| `anthropic_api` | API key in model registry (Anthropic cloud provider) | Claude models via Anthropic SDK | + +The Gemini API (`gemini_api`) is a third type used exclusively by the orchestrator engine — +it is not dispatched through `llm_client.py` and is not available for chat/distill roles. --- @@ -22,40 +23,36 @@ Cortex supports four model types, each dispatched differently: ### Default: Role-Based Routing (Auto) -When no explicit backend is selected, Cortex routes to the model configured for the -request's **role** in the user's model registry. Roles: `chat`, `orchestrator`, `distill`, -`coder`, `research` (extensible via `DEFINED_ROLES` in `.env`). +All routing goes through the user's model registry. When a request arrives, `complete()` in +`llm_client.py` resolves the model for the given role: -Resolution order for a role: -1. User registry: `roles[role].primary → backup_1 → backup_2 → backup_3 → backup_4` -2. `.env` role default: `ROLE_CHAT=claude_cli`, `ROLE_DISTILL=claude_cli`, etc. -3. Hardcoded last-resort: `chat/distill/coder → claude_cli`, `orchestrator/research → gemini_api` - -### Explicit Override - -The **Role** toggle in the Context & Memory panel cycles through configured role slots for the `chat` role: **Primary → Backup 1 → Backup 2 → auto**. - -- Each slot shows the configured model label -- `auto` uses the Primary without forcing a specific backend type -- The ⚡ Tools toggle is independent — it routes to the `orchestrator` role regardless of the chat role selection - -**Fallback chain** (automatic, only when no explicit registry entry exists): ``` -claude → gemini -gemini → claude -local → claude +slot specified → resolve that exact slot (primary / backup_1 / backup_2) +no slot → get_model_for_role(username, role) +no registry entry → RuntimeError: "No model configured for role '...'" ``` -When a model is explicitly configured in the registry, errors surface immediately — no silent fallback. -Each response shows a model tag (bottom-right of the message bubble) with the model label and host. +Roles: `chat`, `orchestrator`, `distill`, `janitor`, `coder`, `research` (extensible via +`DEFINED_ROLES` in `.env`). + +There is no implicit fallback to a built-in model. If no model is configured for a role, +the request fails with a clear error directing the user to `/settings/models`. + +### Explicit Slot Selection + +The **Role** toggle in the Context & Memory panel cycles through configured role slots: +**Primary → Backup 1 → auto**. Each slot resolves the configured model for that position. + +When a model is explicitly configured (via slot or registry entry), errors surface +immediately — no silent fallback to another backend. --- -## Model Registry — V2 Schema +## Model Registry Schema Per-user configuration stored in `home/{user}/model_registry.json`. -Managed at **Settings → Models** (`/settings/models`). Full provider UI coming in Phase 2. +Managed at **Settings → Models** (`/settings/models`). ```json { @@ -64,7 +61,7 @@ Managed at **Settings → Models** (`/settings/models`). Full provider UI coming "providers": { "anthropic": { "credentials": [ - {"id": "cli", "label": "Claude CLI (OAuth)", "type": "cli"} + {"id": "key1", "label": "My Anthropic Key", "type": "api_key", "api_key": "sk-ant-..."} ] }, "google": { @@ -77,6 +74,13 @@ Managed at **Settings → Models** (`/settings/models`). Full provider UI coming "hosts": [ { "id": "abc123", + "label": "OpenRouter", + "api_url": "https://openrouter.ai/api/v1", + "api_key": "sk-or-...", + "host_type": "openai" + }, + { + "id": "def456", "label": "Gaming Laptop", "api_url": "http://192.168.x.x:3000", "api_key": "", @@ -87,23 +91,22 @@ Managed at **Settings → Models** (`/settings/models`). Full provider UI coming "models": [ { "id": "m1", - "type": "claude_cli", - "label": "Sonnet 4.6 (CLI)", - "model_name": "claude-sonnet-4-6", - "provider": "anthropic", - "credential_id": "cli", + "type": "local_openai", + "label": "Claude Sonnet 4.6 (OpenRouter)", + "model_name": "anthropic/claude-sonnet-4-6", + "host_id": "abc123", "context_k": 200, "tags": ["chat", "persona"] }, { "id": "m2", - "type": "gemini_api", - "label": "Gemini 2.5 Flash (OSIT)", - "model_name": "gemini-2.5-flash", - "provider": "google", - "account_id": "a1b2", - "context_k": 1000, - "tags": ["orchestrator", "research"] + "type": "anthropic_api", + "label": "Claude Sonnet 4.6 (Direct)", + "model_name": "claude-sonnet-4-6", + "provider": "anthropic", + "credential_id": "key1", + "context_k": 200, + "tags": ["chat"] }, { "id": "m3", @@ -111,7 +114,7 @@ Managed at **Settings → Models** (`/settings/models`). Full provider UI coming "label": "Gemma 4 E4B", "model_name": "gemma4:e4b", "provider": "local", - "host_id": "abc123", + "host_id": "def456", "context_k": 72, "max_rounds": 5, "tools": true, @@ -120,8 +123,8 @@ Managed at **Settings → Models** (`/settings/models`). Full provider UI coming ], "roles": { - "chat": {"primary": "m1", "backup_1": "m2", "backup_2": "m3"}, - "orchestrator": {"primary": "m2", "backup_1": "m3"}, + "chat": {"primary": "m1", "backup_1": "m2"}, + "orchestrator": {"primary": "m2"}, "distill": {"primary": "m1"} } } @@ -145,52 +148,9 @@ Managed at **Settings → Models** (`/settings/models`). Full provider UI coming Set `api_url` to the base path before `/chat/completions`: - OpenRouter: `https://openrouter.ai/api/v1` -### Built-in model IDs - -Always resolvable without a user-created registry entry. Used as role defaults. - -| ID | Type | Notes | -|---|---|---| -| `claude_cli` | `claude_cli` | Model from `DEFAULT_MODEL` in `.env` | -| `gemini_cli` | `gemini_cli` | Gemini CLI subprocess | -| `gemini_api` | `gemini_api` | Model from `ORCHESTRATOR_MODEL` in `.env`; key from `GEMINI_API_KEY` | - -### V1 → V2 migration - -Automatic on first load. Changes: -- Adds `providers` section (Anthropic CLI credential + empty Google accounts) -- Migrates `gemini_api_key` from `auth.json` → `providers.google.accounts[0]` -- All existing hosts, models, and role assignments are preserved - --- -## Claude Backend (`_claude()`) - -Runs `claude --print --no-session-persistence --output-format text` as a subprocess. - -- System prompt passed via `--system-prompt` -- Conversation history formatted as `` block -- Token read live from `~/.claude/.credentials.json` on every call — never uses the - env var, which goes stale after `claude auth login` -- Model override via `--model` flag when `model_name` is set in the registry entry - -Timeout: `TIMEOUT_CLAUDE=60` seconds (`.env`) - ---- - -## Gemini CLI Backend (`_gemini()`) - -Runs `gemini --output-format text --extensions "" -p ` as a subprocess. - -- `--extensions ""` disables all MCP extensions — prevents child processes keeping pipes open -- `start_new_session=True` puts the process in its own group for clean `os.killpg` on timeout -- Output is cleaned to strip CLI noise (loading messages, retry notices, quota warnings) - -Timeout: `TIMEOUT_GEMINI=120` seconds (`.env`) - ---- - -## Local Backend (`_local()`) +## Local/OpenAI-Compatible Backend (`_local()`) HTTP POST to an OpenAI-compatible endpoint. Model config is resolved via the model registry. @@ -199,13 +159,36 @@ HTTP POST to an OpenAI-compatible endpoint. Model config is resolved via the mod # host_type "openai": POST {api_url}/chat/completions ``` +System prompt is sent as the first `{"role": "system", "content": "..."}` message. +Image attachments are injected into the last user message as `image_url` content blocks. +Token usage is recorded when returned by the endpoint. + +Streaming variant: `_local_streaming()` — SSE line-by-line, yields tokens via `token_sink`. + Timeout: `TIMEOUT_LOCAL=300` seconds (`.env`) — local models may need to load from disk. --- -## Gemini API (Orchestrator) +## Anthropic API Backend (`_anthropic_api()`) -Used by `orchestrator_engine.py` for the ReAct tool loop. Not used for general chat. +Direct call to the Anthropic Messages API via the `anthropic` Python SDK. + +System prompt passed as top-level `system` field. Messages stripped to `role`/`content` only. +Token usage is always recorded from `resp.usage`. + +Streaming variant: `_anthropic_api_streaming()` — uses `client.messages.stream()`, yields +tokens via `token_sink`. + +API key comes from the model registry: `providers.anthropic.credentials[n].api_key`. + +Timeout: governed by httpx defaults and the Anthropic SDK's own connection handling. + +--- + +## Gemini API (Orchestrator only) + +Used by `orchestrator_engine.py` for the ReAct tool loop. Not dispatched through +`llm_client.py` and not available for chat, distill, or other roles. API key resolution order: 1. `api_key` embedded in the resolved orchestrator model config (V2 registry with `account_id`) @@ -217,9 +200,7 @@ API key resolution order: ## Distillation Memory distillation uses `role="distill"`. Configure via Model Registry → Role Assignments. - -`.env` override: `ROLE_DISTILL=claude_cli` (default). - +Any `local_openai` or `anthropic_api` model can be assigned to the distill role. --- @@ -232,4 +213,4 @@ Memory distillation uses `role="distill"`. Configure via Model Registry → Role | `cortex/routers/local_llm.py` | Settings UI routes + `/api/models/role` AJAX | | `cortex/routers/chat.py` | `_backend_label()`, `fallback_used` flag | | `cortex/routers/orchestrator.py` | Engine selection, Gemini API key resolution | -| `cortex/config.py` | `ROLE_*` env defaults, `DEFINED_ROLES`, `PRIMARY_BACKEND` | +| `cortex/config.py` | `ROLE_*` env defaults, `DEFINED_ROLES`, `TIMEOUT_LOCAL` |