feat: SSH dev routing, model registry UX, chat input toolbar, doc sync

Backend / infrastructure: - cortex/tools/_projects.py (new): shared project alias registry with ssh_host for workstation projects (aether_api, aether_frontend, aether_container) - cortex/tools/git.py: all git tools route to workstation via SSH when ssh_host set - cortex/tools/aider.py: aider_run SSH-routes to workstation using bash -l -c - cortex/routers/local_llm.py: POST /api/models/{id}/edit AJAX endpoint — save model edits without page reload or tab reset; returns JSON {ok, label, model_name} - cortex/llm_client.py: remove Gemini CLI and Claude CLI backends; clean up fallback chain and process group tracking (continuation of Gemini CLI removal) - cortex/routers/auth.py: strip Claude/Gemini CLI auth status checks (CLI removed) - cortex/routers/chat.py: remove legacy claude/gemini backend fields - cortex/config.py: clean up CLI-related settings - cortex/main.py: remove CLI lifecycle hooks UI: - cortex/static/local_llm.html: model edit forms now save via fetch() + toast; stay on Models tab; update row header label in place on success - cortex/static/index.html: restructure input area to column layout — textarea above, compact toolbar below (Chat/Tools/Attach + Send); fixes dead space at M/L/XL sizes; context panel "Role" → "Model" section label - cortex/static/style.css: column input-area layout; #input-toolbar; flex:1 → width:100% on textarea (fixes scrollHeight in column flex context); compact send/stop button padding - cortex/static/app.js: add XL (720px) to height cycle; default M (240px) Docs: - cortex/static/HELP.md: S/M/L → S/M/L/XL; add Rebuild to distill table; fix "Role selector" references (no such UI); fix "your active role" → Chat role; fix ⚡ toggle description; Model Registry section cleanup - documentation/ARCH__BACKENDS.md: reflect CLI removal, current backend state Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-18 22:14:07 -04:00
parent 85223326b0
commit b144d8385f
15 changed files with 378 additions and 586 deletions
--- a/cortex/config.py
+++ b/cortex/config.py
@@ -3,7 +3,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict


 class Settings(BaseSettings):
-    anthropic_api_key: str | None = None  # not used — claude CLI handles auth
+    anthropic_api_key: str | None = None  # not used — configure via model registry

    # Google OAuth — "Sign in with Google" for all users
    # Create credentials at console.cloud.google.com → APIs & Services → Credentials
@@ -38,7 +38,6 @@ class Settings(BaseSettings):
    default_model: str = "claude-sonnet-4-6"
    default_tier: int = 2
    max_history_messages: int = 40  # rolling window — 20 turns (user + assistant)
-    primary_backend: str = "claude"  # "claude" | "local" — gemini CLI removed June 2026

    # Local model backend — OpenAI-compatible API (Open WebUI / Ollama)
    # Set LOCAL_API_URL in .env to enable; leave blank to disable
@@ -46,9 +45,6 @@ class Settings(BaseSettings):
    local_api_key: str = ""            # sk-... from Open WebUI → Settings → Account → API Keys
    local_model: str = ""              # workspace or model name, e.g. test-agent-simple

-    # Per-backend timeouts in seconds
-    timeout_claude: int = 60
-    timeout_gemini: int = 120   # frequently slow under load
    timeout_local: int = 300    # local models may need to load first

    # Auto-distillation schedule — override in .env
@@ -66,14 +62,13 @@ class Settings(BaseSettings):
    distill_backend_long: str = ""

    # Model registry: default backend type per role when user registry has no entry.
-    # Values: "claude_cli" | "gemini_cli" | "gemini_api" (builtin IDs)
-    # Override in .env: ROLE_CHAT=claude_cli  ROLE_DISTILL=gemini_api  etc.
-    role_chat: str = "claude_cli"
-    role_orchestrator: str = "gemini_api"
-    role_distill: str = "claude_cli"
-    role_janitor: str = "claude_cli"   # assign a cheap/fast model: Haiku 4.5, local Gemma E4B
-    role_coder: str = "claude_cli"
-    role_research: str = "gemini_api"
+    # All roles must be configured via /settings/models — no built-in fallback.
+    role_chat: str = ""
+    role_orchestrator: str = ""
+    role_distill: str = ""
+    role_janitor: str = ""
+    role_coder: str = ""
+    role_research: str = ""

    # Comma-separated list of standard roles shown in the model settings UI.
    # Add custom roles here to extend the UI without code changes.
@@ -122,8 +117,8 @@ class Settings(BaseSettings):
        return [r.strip() for r in self.defined_roles.split(",") if r.strip()]

    def get_role_default(self, role: str) -> str:
-        """Return the .env default backend type for a role (e.g. 'claude_cli')."""
-        return getattr(self, f"role_{role.replace('-', '_')}", "claude_cli")
+        """Return the .env default backend type for a role, or '' if unconfigured."""
+        return getattr(self, f"role_{role.replace('-', '_')}", "")

    def home_root(self) -> Path:
        """Resolve home_dir relative to this file's location if not absolute."""
--- a/cortex/llm_client.py
+++ b/cortex/llm_client.py
@@ -1,50 +1,18 @@
 import asyncio
 import logging
-import os
-import signal
-import subprocess
 from config import settings
-import event_bus

 logger = logging.getLogger(__name__)

-# Track active Gemini process group IDs so we can kill them on shutdown
-_active_pgroups: set[int] = set()
-
-
-def _register_pgroup(pid: int) -> None:
-    _active_pgroups.add(pid)
-
-
-def _unregister_pgroup(pid: int) -> None:
-    _active_pgroups.discard(pid)
-
-
-async def cleanup() -> None:
-    """Kill any lingering Gemini process groups. Call from lifespan shutdown."""
-    for pid in list(_active_pgroups):
-        try:
-            os.killpg(pid, signal.SIGKILL)
-            logger.info("Shutdown: killed Gemini process group %d", pid)
-        except ProcessLookupError:
-            pass
-    _active_pgroups.clear()
-
-
-# Map from registry model type → dispatch function key
 _TYPE_TO_BACKEND = {
-    "claude_cli":    "claude",
-    "gemini_cli":    "gemini",   # Gemini CLI is being replaced by Antigravity CLI (June 2026)
-    "gemini_api":    "gemini",   # routes to CLI subprocess — no users configured; kept for compat
    "local_openai":  "local",
    "anthropic_api": "anthropic_api",
 }

-# Explicit UI toggle values (kept for backward compat)
-_EXPLICIT_BACKENDS = ("claude", "gemini", "local")
-# Gemini CLI removed from the claude fallback — it's shutting down June 18 2026.
-# claude failures now surface directly; gemini backend still falls back to claude.
-_FALLBACK: dict[str, str | None] = {"claude": None, "gemini": "claude", "local": "claude", "anthropic_api": "claude"}
+_FALLBACK: dict[str, str | None] = {
+    "local":         None,
+    "anthropic_api": None,
+}


 async def complete(
@@ -55,16 +23,15 @@ async def complete(
    slot: str | None = None,
    max_tokens: int = 2048,
    attachment: dict | None = None,
-    token_sink=None,  # async (str) -> None; if set, stream tokens as they arrive
+    token_sink=None,
 ) -> tuple[str, str]:
    """
    Returns (response_text, actual_backend_used).

-    slot:  Phase 3 — specific role slot ("primary" | "backup_1" | "backup_2").
-           Resolves that exact slot, no fallback chain. Takes priority over model.
-    model: legacy backend override ("claude" | "gemini" | "local") from old toggle.
-           None = resolve via model registry for the given role.
-    role:  registry role used for slot/auto routing (default: "chat").
+    slot:  explicit role slot ("primary" | "backup_1" | "backup_2").
+           Resolves that exact slot, no fallback chain. Takes priority over role.
+    role:  registry role used for auto routing (default: "chat").
+    model: ignored — kept for API compatibility; routing is via slot/role only.
    """
    import model_registry as _reg
    from persona import _user
@@ -73,46 +40,33 @@ async def complete(
    resolved_cfg: dict | None = None

    if slot is not None:
-        # Phase 3: explicit slot selection — no fallback within the role
        resolved_cfg = _reg.get_model_for_slot(username, role, slot)
        if resolved_cfg:
-            primary = _TYPE_TO_BACKEND.get(resolved_cfg["type"], "claude")
+            primary = _TYPE_TO_BACKEND.get(resolved_cfg["type"], "local")
        else:
-            # Slot not configured — fall through to auto routing
            slot = None

    if slot is None:
-        if model in _EXPLICIT_BACKENDS:
-            # Legacy: explicit backend override from old UI toggle
-            if model == "local":
-                resolved_cfg = _reg.get_best_local_model(username, role)
-                if not resolved_cfg:
-                    raise RuntimeError("No local model configured — add one at /settings/models")
-            primary = model
-        else:
-            # Auto: role-based routing via model registry
        resolved = _reg.get_model_for_role(username, role)
        if resolved:
            resolved_cfg = resolved
-                primary = _TYPE_TO_BACKEND.get(resolved["type"], "claude")
+            primary = _TYPE_TO_BACKEND.get(resolved["type"], "local")
        else:
-                primary = settings.primary_backend
+            raise RuntimeError(
+                f"No model configured for role '{role}'. "
+                "Add one at /settings/models."
+            )

-    fallback = _FALLBACK.get(primary, "claude")
+    fallback = _FALLBACK.get(primary)

    try:
        response = await _dispatch(primary, system_prompt, messages, resolved_cfg,
                                   attachment=attachment, token_sink=token_sink)
        return response, primary
    except Exception as e:
-        err_str = str(e)
-        if primary == "claude" and any(k in err_str for k in ("401", "authenticate", "expired", "OAuth")):
-            await event_bus.publish({"type": "claude_auth_expired"})
-        # Surface errors when a model is explicitly configured or a specific slot was pinned.
        if resolved_cfg is not None:
            logger.error("%s failed (no fallback — model explicitly configured): %s", primary, e)
            raise
-        # No fallback defined for this backend — surface the error directly.
        if not fallback:
            logger.error("%s failed (no fallback configured): %s", primary, e)
            raise
@@ -129,9 +83,7 @@ async def _dispatch(
    attachment: dict | None = None,
    token_sink=None,
 ) -> str:
-    if backend == "gemini":
-        text = await _gemini(system_prompt, messages)
-    elif backend == "local":
+    if backend == "local":
        if token_sink:
            return await _local_streaming(token_sink, system_prompt, messages, model_cfg)
        text = await _local(system_prompt, messages, model_cfg, attachment=attachment)
@@ -140,55 +92,12 @@ async def _dispatch(
            return await _anthropic_api_streaming(token_sink, system_prompt, messages, model_cfg)
        text = await _anthropic_api(system_prompt, messages, model_cfg)
    else:
-        text = await _claude(system_prompt, messages, model_cfg)
-    # For non-streaming backends when token_sink is provided, emit the full text as one chunk.
+        raise RuntimeError(f"Unknown backend '{backend}' — check model type in registry")
    if token_sink and text:
        await token_sink(text)
    return text


-def _fresh_claude_token() -> str | None:
-    """Read the current OAuth access token from the Claude credentials file.
-
-    The token in the systemd .env goes stale (it rotates on each login).
-    Reading directly from ~/.claude/.credentials.json always gets the latest.
-    """
-    import json as _json
-    creds_path = os.path.expanduser("~/.claude/.credentials.json")
-    try:
-        with open(creds_path) as f:
-            data = _json.load(f)
-        return data["claudeAiOauth"]["accessToken"]
-    except Exception as e:
-        logger.debug("Could not read Claude credentials file: %s", e)
-        return None
-
-
-async def _claude(system_prompt: str, messages: list[dict], model_cfg: dict | None) -> str:
-    model_name = (model_cfg or {}).get("model_name") if model_cfg else None
-    cmd = [
-        "claude", "--print",
-        "--no-session-persistence",
-        "--output-format", "text",
-    ]
-    # Only pass --model if it's a real model name (not a backend type string)
-    if model_name and model_name not in ("claude", "gemini", "local", ""):
-        cmd.extend(["--model", model_name])
-    if system_prompt:
-        cmd.extend(["--system-prompt", system_prompt])
-    cmd.append(_build_conversation(messages))
-
-    # Always use the freshest token from the credentials file so the systemd
-    # service doesn't break when the env-var token rotates after a login.
-    env = os.environ.copy()
-    token = _fresh_claude_token()
-    if token:
-        env["CLAUDE_CODE_OAUTH_TOKEN"] = token
-        env.pop("ANTHROPIC_API_KEY", None)  # never let a stale API key override OAuth
-
-    return await _run(cmd, timeout=settings.timeout_claude, env=env)
-
-
 async def _local(
    system_prompt: str,
    messages: list[dict],
@@ -413,106 +322,3 @@ async def _local_streaming(
    return full_text.strip()


-async def _gemini(system_prompt: str, messages: list[dict]) -> str:
-    # Gemini CLI spawns MCP child processes that keep stdout pipes open after responding.
-    # start_new_session=True puts the whole tree in its own process group so
-    # os.killpg kills everything at once on timeout.
-    cmd = [
-        "gemini",
-        "--output-format", "text",
-        "--extensions", "",   # disable all extensions — prevents MCP child processes
-        "-p", _build_prompt(system_prompt, messages),
-    ]
-
-    try:
-        proc = await asyncio.create_subprocess_exec(
-            *cmd,
-            stdout=asyncio.subprocess.PIPE,
-            stderr=asyncio.subprocess.PIPE,
-            start_new_session=True,
-        )
-    except FileNotFoundError:
-        raise RuntimeError("gemini not found in PATH")
-
-    _register_pgroup(proc.pid)
-    timeout = settings.timeout_gemini
-    try:
-        stdout_bytes, _ = await asyncio.wait_for(proc.communicate(), timeout=timeout)
-        raw = stdout_bytes.decode()
-    except asyncio.TimeoutError:
-        try:
-            os.killpg(proc.pid, signal.SIGKILL)
-        except ProcessLookupError:
-            pass
-        raise RuntimeError(f"Gemini timed out after {timeout}s")
-    except asyncio.CancelledError:
-        try:
-            os.killpg(proc.pid, signal.SIGKILL)
-        except ProcessLookupError:
-            pass
-        raise
-    finally:
-        _unregister_pgroup(proc.pid)
-
-    clean = _clean_gemini_output(raw)
-    if not clean:
-        raise RuntimeError("Gemini returned an empty response")
-    return clean
-
-
-# Lines Gemini CLI writes to stdout that are not part of the actual response
-_GEMINI_NOISE = (
-    "Loaded cached credentials",
-    "Loading extension:",
-    "Server '",
-    "Listening for",
-    "Model is overloaded",
-    "High demand",
-    "Retrying",
-    "retrying",
-    "429",
-    "quota",
-)
-
-
-def _clean_gemini_output(text: str) -> str:
-    lines = [
-        line for line in text.splitlines()
-        if not any(line.strip().startswith(p) for p in _GEMINI_NOISE)
-    ]
-    return "\n".join(lines).strip()
-
-
-async def _run(cmd: list[str], timeout: int = 60, env: dict | None = None) -> str:
-    loop = asyncio.get_running_loop()
-    result = await loop.run_in_executor(
-        None,
-        lambda: subprocess.run(cmd, capture_output=True, text=True, timeout=timeout, env=env),
-    )
-    if result.returncode != 0:
-        detail = result.stderr.strip() or result.stdout.strip() or f"exit code {result.returncode}"
-        raise RuntimeError(f"{cmd[0]} failed: {detail}")
-    return result.stdout.strip()
-
-
-def _build_conversation(messages: list[dict]) -> str:
-    """Conversation only — used for Claude (system prompt passed separately)."""
-    parts = []
-    prior = messages[:-1]
-    if prior:
-        history_lines = []
-        for msg in prior:
-            label = settings.user_name if msg["role"] == "user" else settings.agent_name
-            history_lines.append(f"{label}: {msg['content']}")
-        parts.append("<conversation>\n" + "\n\n".join(history_lines) + "\n</conversation>")
-    parts.append(messages[-1]["content"] if messages else "")
-    return "\n\n".join(parts)
-
-
-def _build_prompt(system_prompt: str, messages: list[dict]) -> str:
-    """Full prompt with system context embedded — used for Gemini."""
-    parts = []
-    if system_prompt:
-        parts.append(f"<system>\n{system_prompt}\n</system>")
-    parts.append(_build_conversation(messages))
-    return "\n\n".join(parts)
--- a/cortex/main.py
+++ b/cortex/main.py
@@ -18,8 +18,6 @@ async def lifespan(app: FastAPI):
    scheduler.start()
    yield
    scheduler.stop()
-    from llm_client import cleanup
-    await cleanup()


 app = FastAPI(title="Cortex Dispatcher", lifespan=lifespan)
--- a/cortex/routers/auth.py
+++ b/cortex/routers/auth.py
@@ -1,76 +1,12 @@
 """
-CLI auth status for both Claude and Gemini backends.
-
-GET /auth/status  — returns per-backend auth info and warning flags
-
-Claude:  warns when OAuth token is < WARN_HOURS from expiry (requires
-         user to re-run `claude` to refresh via browser flow).
-Gemini:  warns only when oauth_creds.json is missing or has no
-         refresh_token (access token rotates automatically every ~1h).
+GET /auth/status — returns connectivity status for configured model backends.
 """
-import json
 import logging
-from datetime import datetime, timezone
-from pathlib import Path
 from fastapi import APIRouter
-from config import settings

 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/auth")

-CLAUDE_CREDS  = Path.home() / ".claude" / ".credentials.json"
-GEMINI_CREDS  = Path.home() / ".gemini" / "oauth_creds.json"
-GEMINI_ACCTS  = Path.home() / ".gemini" / "google_accounts.json"
-WARN_HOURS = 24          # no refresh token — warn a day ahead
-WARN_HOURS_REFRESH = 1  # refresh token present — only warn if CLI hasn't rotated in time
-
-
-def _claude_status() -> dict:
-    try:
-        data = json.loads(CLAUDE_CREDS.read_text())
-        oauth = data["claudeAiOauth"]
-        has_refresh = bool(oauth.get("refreshToken"))
-        expires_dt = datetime.fromtimestamp(oauth["expiresAt"] / 1000, tz=timezone.utc)
-        now = datetime.now(tz=timezone.utc)
-        hours_remaining = (expires_dt - now).total_seconds() / 3600
-        # When a refresh token is present the CLI *should* auto-rotate the access
-        # token, but sometimes it doesn't.  Use a tight 1-hour window so a fresh
-        # 8-hour token doesn't immediately trigger a warning, but a stale token
-        # that the CLI missed will still surface before it expires.
-        expired = hours_remaining <= 0
-        threshold = WARN_HOURS_REFRESH if has_refresh else WARN_HOURS
-        warning = expired or hours_remaining < threshold
-        return {
-            "ok": True,
-            "has_refresh_token": has_refresh,
-            "access_token_expires_at": expires_dt.isoformat(),
-            "access_token_hours_remaining": round(hours_remaining, 1),
-            "warning": warning,
-            "expired": expired,
-        }
-    except Exception as e:
-        logger.warning("claude auth check failed: %s", e)
-        return {"ok": False, "error": str(e), "warning": True, "expired": False}
-
-
-def _gemini_status() -> dict:
-    try:
-        creds = json.loads(GEMINI_CREDS.read_text())
-        if not creds.get("refresh_token"):
-            return {"ok": True, "authenticated": False, "warning": True, "account": None}
-        account = None
-        try:
-            accts = json.loads(GEMINI_ACCTS.read_text())
-            account = accts.get("active")
-        except Exception:
-            pass
-        return {"ok": True, "authenticated": True, "warning": False, "account": account}
-    except FileNotFoundError:
-        return {"ok": True, "authenticated": False, "warning": True, "account": None}
-    except Exception as e:
-        logger.warning("gemini auth check failed: %s", e)
-        return {"ok": False, "error": str(e), "warning": True, "authenticated": False}
-

 async def _local_status(username: str = "scott") -> dict:
    """Check reachability of the user's configured local model host."""
@@ -104,7 +40,5 @@ async def _local_status(username: str = "scott") -> dict:
@router.get("/status")
 async def auth_status() -> dict:
    return {
-        "claude": _claude_status(),
-        "gemini": _gemini_status(),
        "local": await _local_status(),
    }
--- a/cortex/routers/chat.py
+++ b/cortex/routers/chat.py
@@ -21,11 +21,7 @@ router = APIRouter()


 def _backend_label(backend: str, username: str, role: str = "chat") -> str:
-    """Human-readable label for the model that handled a request (legacy path)."""
-    if backend == "claude":
-        return "Claude"
-    if backend == "gemini":
-        return "Gemini"
+    """Human-readable label for the model that handled a request."""
    if backend == "local":
        cfg = model_registry.get_best_local_model(username, role)
        if cfg:
@@ -52,7 +48,7 @@ class ChatRequest(BaseModel):
    message: str
    session_id: str | None = None
    tier: int | None = None
-    model: str | None = None        # legacy backend override ("claude"|"gemini"|"local")
+    model: str | None = None        # ignored — kept for API compatibility
    slot: str | None = None         # Phase 3: explicit slot ("primary"|"backup_1"|"backup_2")
    chat_role: str = "chat"         # active role: "chat"|"coder"|"research"|"distill" etc.
    include_long: bool = True
@@ -64,10 +60,6 @@ class ChatRequest(BaseModel):
    attachment: Attachment | None = None  # image attachment (text files injected client-side)


-class BackendRequest(BaseModel):
-    primary: str  # "claude", "gemini", or "local"
-
-
 class NoteRequest(BaseModel):
    session_id: str
    note: str
@@ -183,9 +175,6 @@ async def _stream_chat(req: ChatRequest):
            yield f"data: {json.dumps({'type': 'error', 'message': str(e)})}\n\n"

    finally:
-        # Ensure the LLM task is cancelled if the generator is torn down
-        # (e.g. client disconnect or server shutdown). This propagates
-        # CancelledError into _gemini() which kills the process group.
        if not task.done():
            task.cancel()
            try:
@@ -203,10 +192,6 @@ async def chat(req: ChatRequest) -> StreamingResponse:
    )


-_BACKEND_CYCLE = ("claude", "gemini", "local")
-_BACKEND_FALLBACK = {"claude": "gemini", "gemini": "claude", "local": "claude"}
-
-
 def _request_user(request: Request) -> str | None:
    """Extract username from JWT cookie, or None."""
    try:
@@ -216,20 +201,6 @@ def _request_user(request: Request) -> str | None:
        return None


-def _local_model_info(request: Request) -> dict | None:
-    """Return the best local model {label, model_name} for the session user, or None."""
-    username = _request_user(request)
-    if not username:
-        return None
-    try:
-        cfg = model_registry.get_best_local_model(username, "chat")
-        if cfg:
-            return {"label": cfg.get("label", ""), "model_name": cfg.get("model_name", "")}
-    except Exception:
-        pass
-    return None
-
-
 def _chat_slot_models(username: str) -> list[dict]:
    """Return [{slot, label, type}] for each configured slot in the chat role, primary first."""
    registry = model_registry.get_registry(username)
@@ -279,7 +250,6 @@ async def get_backend(request: Request) -> dict:
    username        = _request_user(request)
    chat_models     = _chat_slot_models(username) if username else []
    available_roles = _available_roles_for_toggle(username) if username else []
-    p = settings.primary_backend

    orch_label = None
    if username:
@@ -288,25 +258,9 @@ async def get_backend(request: Request) -> dict:
            orch_label = orch_cfg.get("label") or orch_cfg.get("model_name") or None

    return {
-        "chat_models":        chat_models,       # Phase 3: [{slot, label, type}] for chat-role slots
-        "available_roles":    available_roles,    # kept for banner + backward compat
+        "chat_models":        chat_models,
+        "available_roles":    available_roles,
        "orchestrator_model": orch_label,
-        # Legacy fields kept for backward compat
-        "primary":     p,
-        "fallback":    _BACKEND_FALLBACK.get(p, "claude"),
-        "local_model": _local_model_info(request),
-    }
-
-
-@router.post("/backend")
-async def set_backend(req: BackendRequest, request: Request) -> dict:
-    if req.primary not in _BACKEND_CYCLE:
-        raise HTTPException(status_code=400, detail="primary must be 'claude', 'gemini', or 'local'")
-    settings.primary_backend = req.primary
-    return {
-        "primary":     req.primary,
-        "fallback":    _BACKEND_FALLBACK[req.primary],
-        "local_model": _local_model_info(request),
    }


--- a/cortex/routers/local_llm.py
+++ b/cortex/routers/local_llm.py
@@ -744,6 +744,53 @@ async def remove_custom_role_route(
    return RedirectResponse("/settings/models#roles", status_code=303)


+@router.post("/api/models/{model_id}/edit")
+async def edit_model_ajax(
+    request:                 Request,
+    model_id:                str,
+    mtype:                   str = Form(""),
+    label:                   str = Form(""),
+    model_name:              str = Form(""),
+    context_k:               int = Form(0),
+    max_rounds:              int = Form(0),
+    tools:                   int = Form(1),
+    tags:                    str = Form(""),
+    reasoning_budget_tokens: int = Form(0),
+    host_id:                 str = Form(""),
+    account_id:              str = Form(""),
+    credential_id:           str = Form("cli"),
+) -> JSONResponse:
+    """AJAX: edit a model entry. Returns JSON {ok, label, model_name} on success."""
+    username = _get_user(request)
+    if not username:
+        return JSONResponse({"error": "Not authenticated"}, status_code=401)
+    if not model_name.strip():
+        return JSONResponse({"error": "Model name is required."}, status_code=400)
+    tag_list          = [t.strip() for t in tags.split(",") if t.strip()]
+    max_rounds_       = max_rounds or None
+    tools_bool        = tools != 0
+    reasoning_budget_ = reasoning_budget_tokens or None
+    if mtype == "local_openai":
+        if not host_id.strip():
+            return JSONResponse({"error": "Select a host for this model."}, status_code=400)
+        reg.save_model(username, model_id, host_id, label, model_name, context_k, tag_list,
+                       max_rounds=max_rounds_, tools=tools_bool,
+                       reasoning_budget_tokens=reasoning_budget_)
+    elif mtype == "gemini_api":
+        reg.save_cloud_model(username, model_id, "google", model_name, label,
+                             account_id=account_id or None, context_k=context_k, tags=tag_list,
+                             max_rounds=max_rounds_, tools=tools_bool)
+    elif mtype in ("claude_cli", "anthropic_api"):
+        reg.save_cloud_model(username, model_id, "anthropic", model_name, label,
+                             credential_id=credential_id or "cli", context_k=context_k, tags=tag_list,
+                             max_rounds=max_rounds_, tools=tools_bool)
+    else:
+        return JSONResponse({"error": f"Unknown model type: {mtype}"}, status_code=400)
+    display = label.strip() or model_name.strip()
+    logger.info("model edited (ajax): %s / %s (%s)", username, display, mtype)
+    return JSONResponse({"ok": True, "label": display, "model_name": model_name.strip()})
+
+
@router.post("/api/models/role")
 async def set_role(request: Request) -> JSONResponse:
    """AJAX: assign a model to a role priority slot.
--- a/cortex/static/HELP.md
+++ b/cortex/static/HELP.md
@@ -6,7 +6,7 @@
     and are appended automatically by help.html when present.
 -->

-*Last updated: 2026-05-13*
+*Last updated: 2026-06-18* <!-- input toolbar refactor; XL size added; help doc sync -->

 ---

@@ -44,7 +44,7 @@ The **Context & Memory** panel (sliders icon with tier number) contains all conf
 | **Memory Layers** | Toggle Long / Mid / Short memory on/off |
 | **Distill Memory** | Manually trigger Short / Mid / Long / All distillation |
 | **Model** | Active chat model — click to cycle through your configured slot models (Primary → Backup 1 → …) |
-| **Display** | **Aa** cycles font size · **☾** toggles theme · **S/M/L** cycles input area height · **⌃↵** toggles send shortcut |
+| **Display** | **Aa** cycles font size · **☾** toggles theme · **S/M/L/XL** cycles input area height · **⌃↵** toggles send shortcut |

 All settings persist in `localStorage` across page refreshes.

@@ -74,7 +74,7 @@ The orchestrator runs a multi-step tool loop:
 3. The model produces the final user-facing reply — when the orchestrator role uses Gemini, Claude writes the final response; when it uses a local model, that same model writes it
 4. Expandable tool-call cards appear above the response — click any card to see the arguments sent and the result returned

-The ⚡ toggle is **independent of the Role selector** — you can use any role (chat, coder, research, etc.) with or without tools. The orchestrator model is configured in **Account → Model Registry → Role Assignments → Orchestrator**.
+The ⚡ toggle routes requests through the **Orchestrator** role model regardless of which chat model is active. Configure it in **Account → Model Registry → Role Assignments → Orchestrator**.

 Tools mode is best for tasks requiring research, multi-step reasoning, or side effects (e.g. "search for X", "add a task", "what's on my list?", "append this to my journal"). Regular chat is faster for conversational turns.

@@ -156,7 +156,7 @@ Once installed, opening Cortex from the home screen or app launcher skips the br

 ## Switching Models

-The **Model** button in the Context & Memory panel cycles through the slot models configured for your active role (Primary → Backup 1). Click it to switch between models mid-session.
+The **Model** button in the Context & Memory panel cycles through the slot models configured for your **Chat** role (Primary → Backup 1). Click it to switch between models mid-session.

 - The button label shows the active model (e.g. "GPT-4o", "Gemini 2.5 Flash")
 - The selected slot is sent with each chat request so the correct model is used
@@ -205,12 +205,11 @@ The table shows all-time totals per model key, with columns for:

 Values ≥ 1,000 are displayed as `k` (e.g. `24.3k`).

-**What is and isn't tracked:**
+**What is tracked:**

- ✅ Gemini API calls (orchestrator, distillation)
+- ✅ Anthropic API calls (direct SDK)
 - ✅ Local OpenAI-compatible calls (Open WebUI, Ollama, OpenRouter)
- ✗ Claude CLI — no structured token data is returned by the subprocess
- ✗ Gemini CLI — same reason
+- ✅ Gemini API calls (orchestrator, distillation)

 The raw data lives in `home/{username}/usage.json` and is also accessible via the Files panel or the API.

@@ -230,9 +229,10 @@ Configure which AI models are available and which handles each task type.

 Do this before adding models — models need a provider account or local host to attach to.

-**Anthropic (Claude):** Two options:
- **CLI (OAuth):** Nothing to configure — uses your existing `claude auth login` session. If Claude isn't working, run `claude auth login` in a terminal.
- **Direct API key:** Scroll to **Cloud Providers → Anthropic** → click **+ Add API key**. Enter a label and your `sk-ant-…` key from [console.anthropic.com/keys](https://console.anthropic.com/keys). When you add a model using an API key credential, it routes through the Anthropic SDK instead of the CLI.
+**Anthropic (Claude):** Uses a direct API key — no Claude CLI required:
+- Scroll to **Cloud Providers → Anthropic** → click **+ Add API key**
+- Enter a label and your `sk-ant-…` key from [console.anthropic.com/keys](https://console.anthropic.com/keys)
+- Models added with this credential call the Anthropic API directly via the SDK

 **Google (Gemini):** Add one entry per API key you want to use:
 1. Scroll to **Cloud Providers → Google** → click **+ Add Google account**
@@ -261,7 +261,7 @@ Scroll to **Add Model**. Select the provider tab, fill in the details, click **A
 |---|---|
 | **Local** | Select a host (from Step 1) → enter model name, or use **Fetch from host** to pick from a live list |
 | **Google** | Select a Gemini model from the catalog → select a Google account (from Step 1) |
-| **Anthropic** | Select a credential (CLI OAuth or an API key added in Step 1) → select a Claude model from the catalog |
+| **Anthropic** | Select an API key credential (from Step 1) → select a Claude model from the catalog |

 The label and context window size auto-fill from the catalog — edit them if you want. Tags are optional.

@@ -286,7 +286,7 @@ Scroll to **Role Assignments** at the bottom of the page. Each role has **Primar
 | **Coder** | Code-focused tasks — larger context window, code-aware model |
 | **Research** | Long-context research — high-token model, web tools prioritized |

-Switch roles via the **Role** selector in the Context & Memory panel (⚙). Leave all slots empty to use the server default.
+Leave all slots empty to use the server default.

 **Per-role tool sets:** Expand any role card to configure which tool categories the orchestrator can use when that role is active. Unchecked categories are hidden from the model entirely — reducing token overhead on every orchestrated call. Leaving all categories unchecked means all tools the user has access to are available (the default).

@@ -390,6 +390,7 @@ Distillation builds up the memory layers from raw session logs. Runs automatical
 | **mid** | LLM summarizes `MEMORY_SHORT.md` → `MEMORY_MID.md` |
 | **long** | LLM integrates `MEMORY_MID.md` → `MEMORY_LONG.md` |
 | **all** | Runs short → mid → long in sequence |
+| **Rebuild** | ⚠ Wipes Mid + Long memories and rebuilds from session logs. Use to recover from distillation drift. Hand-edited content will be replaced. |

 **Recommended workflow:** run **short** after any productive session; **mid** weekly; **long** monthly.

@@ -462,8 +463,7 @@ For direct access or scripting:
 | Method | Endpoint | Description |
 |---|---|---|
 | `POST` | `/chat` | Send a message — returns SSE stream |
-| `GET` | `/backend` | Get current primary/fallback backends |
-| `POST` | `/backend` | Set primary backend (`{"primary": "claude"}`) |
+| `GET` | `/backend` | Get configured model slots and orchestrator |
 | `GET` | `/sessions` | List all sessions |
 | `GET` | `/history/{id}` | Get session message history |
 | `PUT` | `/history/{id}` | Replace full session history |
--- a/cortex/static/app.js
+++ b/cortex/static/app.js
@@ -140,15 +140,16 @@
        });

        // ── Textarea height ──────────────────────────────────────────
-        const HEIGHT_SIZES  = [120, 240, 480];
-        const HEIGHT_LABELS = ['S', 'M', 'L'];
+        const HEIGHT_SIZES  = [120, 240, 480, 720];
+        const HEIGHT_LABELS = ['S', 'M', 'L', 'XL'];
        const HEIGHT_TITLES = [
            'Input size: Compact — click to cycle',
            'Input size: Medium — click to cycle',
            'Input size: Large — click to cycle',
+            'Input size: Extra Large — click to cycle',
        ];

-        let maxHeight = parseInt(localStorage.getItem('maxHeight') || '120');
+        let maxHeight = parseInt(localStorage.getItem('maxHeight') || '240');
        const heightCycleBtn = document.getElementById('height-cycle-btn');

        function syncHeight() {
--- a/cortex/static/index.html
+++ b/cortex/static/index.html
@@ -115,9 +115,9 @@
                <div id="ctx-schedule"></div>
            </div>
            <div class="ctx-section">
-                <div class="ctx-section-title">Role</div>
+                <div class="ctx-section-title">Model</div>
                <div class="ctx-row">
-                    <button id="backend-toggle" class="ctx-btn" title="Active role — click to cycle">chat</button>
+                    <button id="backend-toggle" class="ctx-btn" title="Active model — click to cycle chat role slots">chat</button>
                </div>
                <div id="backend-model-hint"></div>
            </div>
@@ -167,24 +167,6 @@
    <div id="messages"></div>

    <div id="input-area">
-        <!-- Mode select — compact dropdown, opens upward, MRU sorted -->
-        <div id="mode-select">
-            <button id="mode-select-btn" title="Input mode">
-                <span id="mode-icon">💬</span>
-                <span id="mode-label">Chat</span>
-                <span class="mode-arrow">▲</span>
-            </button>
-            <!-- Populated dynamically in MRU order -->
-            <div id="mode-dropdown"></div>
-            <!-- Note visibility sub-toggle — only shown when note mode is active -->
-            <button id="note-vis-btn" title="Toggle note visibility (private / public)">prv</button>
-            <!-- Tools toggle — routes through the orchestrator tool loop when active -->
-            <button id="tools-toggle" title="Tools disabled — click to enable">⚡</button>
-            <!-- Attach file — images (vision) or text/code files -->
-            <button id="attach-btn" title="Attach image or text file">📎</button>
-            <input type="file" id="file-input" style="display:none"
-                   accept="image/png,image/jpeg,image/webp,image/gif,text/plain,text/markdown,.md,.txt,.py,.js,.ts,.jsx,.tsx,.json,.yaml,.yml,.toml,.html,.css,.sh,.csv,.xml,.rs,.go,.java,.c,.cpp,.h,.rb,.php,.swift,.kt,.sql">
-        </div>
        <!-- Attachment preview — shown when a file is pending -->
        <div id="attachment-row" style="display:none">
            <div id="attachment-preview">
@@ -195,7 +177,26 @@
            </div>
        </div>
        <textarea id="input" rows="1" placeholder="Message…" autofocus></textarea>
-        <div id="send-col">
+        <!-- Compact toolbar: mode, tools, attach | spacer | send/stop -->
+        <div id="input-toolbar">
+            <div id="mode-select">
+                <button id="mode-select-btn" title="Input mode">
+                    <span id="mode-icon">💬</span>
+                    <span id="mode-label">Chat</span>
+                    <span class="mode-arrow">▲</span>
+                </button>
+                <!-- Populated dynamically in MRU order -->
+                <div id="mode-dropdown"></div>
+            </div>
+            <!-- Note visibility sub-toggle — only shown when note mode is active -->
+            <button id="note-vis-btn" title="Toggle note visibility (private / public)">prv</button>
+            <!-- Tools toggle — routes through the orchestrator tool loop when active -->
+            <button id="tools-toggle" title="Tools disabled — click to enable">⚡</button>
+            <!-- Attach file — images (vision) or text/code files -->
+            <button id="attach-btn" title="Attach image or text file">📎</button>
+            <input type="file" id="file-input" style="display:none"
+                   accept="image/png,image/jpeg,image/webp,image/gif,text/plain,text/markdown,.md,.txt,.py,.js,.ts,.jsx,.tsx,.json,.yaml,.yml,.toml,.html,.css,.sh,.csv,.xml,.rs,.go,.java,.c,.cpp,.h,.rb,.php,.swift,.kt,.sql">
+            <div style="flex:1"></div>
            <button id="send">Send</button>
            <button id="stop"><svg data-lucide="square" width="14" height="14" class="btn-icon"></svg> Stop</button>
        </div>
--- a/cortex/static/local_llm.html
+++ b/cortex/static/local_llm.html
@@ -982,6 +982,42 @@
      });
    });

+    // ── Model edit: AJAX save (stay on Models tab) ────────────────────────────
+    document.querySelectorAll('.model-edit-form').forEach(form => {
+      form.addEventListener('submit', async e => {
+        e.preventDefault();
+        const id      = form.id.replace('edit-form-', '');
+        const saveBtn = form.querySelector('button[type="submit"]');
+        saveBtn.disabled = true;
+        try {
+          const res  = await fetch(`/api/models/${id}/edit`, {method: 'POST', body: new FormData(form)});
+          const data = await res.json();
+          if (data.ok) {
+            // Update the row header label in place
+            const row = document.getElementById('model-' + id);
+            if (row && data.label) {
+              const labelEl = row.querySelector('.model-label');
+              if (labelEl) labelEl.textContent = data.label;
+            }
+            if (row && data.model_name) {
+              const nameEl = row.querySelector('.model-name');
+              if (nameEl) nameEl.textContent = data.model_name;
+            }
+            // Close the edit panel
+            form.style.display = 'none';
+            document.querySelector(`.model-edit-btn[data-id="${id}"]`).textContent = 'Edit';
+            showToast('Model saved');
+          } else {
+            showToast(data.error || 'Save failed', true);
+          }
+        } catch (err) {
+          showToast(err.message, true);
+        } finally {
+          saveBtn.disabled = false;
+        }
+      });
+    });
+
    // ── Edit form: fetch from host ────────────────────────────────────────────
    document.querySelectorAll('.edit-fetch-btn').forEach(btn => {
      btn.addEventListener('click', async () => {
--- a/cortex/static/style.css
+++ b/cortex/static/style.css
@@ -735,35 +735,28 @@
        .message.note-private .note-content { color: #c9a84c; white-space: pre-wrap; }
        .message.note-public  .note-content { color: #4abfb0; white-space: pre-wrap; }

-        /* ── Input area — 3-col: [mode-toggle] [textarea] [send-col] ── */
+        /* ── Input area — column: [attachment?] [textarea] [toolbar] ── */
        #input-area {
-            padding: 12px 20px;
+            padding: 10px 20px 12px;
            background: var(--surface);
            border-top: 1px solid var(--border);
            display: flex;
-            flex-direction: row;
-            gap: 10px;
-            align-items: flex-end;
+            flex-direction: column;
+            gap: 6px;
        }

-        /* ── Mode select — compact dropdown ─────────────────────────── */
+        /* ── Compact toolbar below the textarea ─────────────────────── */
+        #input-toolbar {
+            display: flex;
+            flex-direction: row;
+            align-items: center;
+            gap: 6px;
+        }
+
+        /* ── Mode select — positioned container for dropdown only ────── */
        #mode-select {
            position: relative;
            flex-shrink: 0;
-            display: flex;
-            flex-direction: column;
-            align-items: stretch;
-            gap: 4px;
-        }
-
-        /* S: collapse to a single row — mode button + compact tools toggle */
-        #mode-select[data-size="s"] {
-            flex-direction: row;
-            align-items: center;
-        }
-        #mode-select[data-size="s"] #tools-toggle {
-            padding: 3px 7px;
-            font-size: 0.75rem;
        }

        #mode-select-btn {
@@ -874,8 +867,7 @@
        #attach-btn:hover { color: rgba(255,255,255,0.6); border-color: rgba(255,255,255,0.25); }

        #attachment-row {
-            padding: 0.3rem 0.5rem;
-            border-bottom: 1px solid var(--border);
+            padding: 0.2rem 0;
        }
        #attachment-preview {
            display: inline-flex;
@@ -914,7 +906,8 @@
        #attachment-clear:hover { color: var(--text); }

        #input {
-            flex: 1;
+            width: 100%;
+            box-sizing: border-box;
            background: var(--bg);
            border: 1px solid var(--border);
            border-radius: 8px;
@@ -936,16 +929,7 @@
        #input.mode-note.public:focus { border-color: rgba(40,170,150,0.85); }
        #input.mode-otr { border-color: rgba(120,80,160,0.4); background: rgba(120,80,160,0.04); }

-        /* Send column — right side, stacked */
-        #send-col {
-            display: flex;
-            flex-direction: column;
-            align-items: stretch;
-            gap: 4px;
-            flex-shrink: 0;
-        }
-
-        /* Send button */
+        /* Send button — sits in #input-toolbar row */
        #send {
            display: flex;
            align-items: center;
@@ -955,11 +939,12 @@
            border: 1px solid var(--user-border);
            color: var(--text);
            border-radius: 8px;
-            padding: 10px 14px;
+            padding: 7px 16px;
            cursor: pointer;
            font-size: 0.9rem;
            text-align: center;
            white-space: nowrap;
+            flex-shrink: 0;
            transition: background 0.15s;
        }

@@ -977,10 +962,11 @@
            border: 1px solid var(--error-border);
            color: var(--error-text);
            border-radius: 8px;
-            padding: 10px 14px;
+            padding: 7px 14px;
            cursor: pointer;
            font-size: 0.9rem;
            text-align: center;
+            flex-shrink: 0;
            transition: background 0.15s;
        }

--- a/cortex/tools/_projects.py
+++ b/cortex/tools/_projects.py
@@ -0,0 +1,31 @@
+"""Shared project alias registry for Cortex tools."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+
+
+@dataclass
+class ProjectDef:
+    path: str       # path on the host where the project lives (~ is expanded at runtime)
+    ssh_host: str = ""  # if set, git/aider commands run via SSH on this host
+
+
+_CORTEX_ROOT_STR: str = str(Path(__file__).parent.parent.parent.resolve())
+
+PROJECT_ALIASES: dict[str, ProjectDef] = {
+    "cortex": ProjectDef(path=_CORTEX_ROOT_STR),
+    "aether_api": ProjectDef(
+        path="~/OSIT_dev/aether_api_fastapi",
+        ssh_host="scott-wks-main-i7",
+    ),
+    "aether_frontend": ProjectDef(
+        path="~/OSIT_dev/aether_app_sveltekit",
+        ssh_host="scott-wks-main-i7",
+    ),
+    "aether_container": ProjectDef(
+        path="~/OSIT_dev/aether_container_env",
+        ssh_host="scott-wks-main-i7",
+    ),
+}
--- a/cortex/tools/aider.py
+++ b/cortex/tools/aider.py
@@ -16,25 +16,16 @@ background=True runs the subprocess asynchronously and returns an agent_id immed
 import asyncio
 import logging
 import os
+import shlex
 from pathlib import Path

 from google.genai import types

 import agent_manager
+from ._projects import PROJECT_ALIASES

 logger = logging.getLogger(__name__)

-_CORTEX_DIR = Path(__file__).parent      # .../Cortex_and_Inara_dev/cortex/
-_PROJECT_ROOT = _CORTEX_DIR.parent      # .../Cortex_and_Inara_dev/
-
-# Known project aliases — expand before passing to subprocess
-_PROJECT_ALIASES: dict[str, str] = {
-    "cortex":           str(_PROJECT_ROOT),
-    "aether_api":       "~/OSIT_dev/aether_api_fastapi",
-    "aether_frontend":  "~/OSIT_dev/aether_app_sveltekit",
-    "aether_container": "~/OSIT_dev/aether_container_env",
-}
-
 _MAX_OUTPUT_CHARS = 12_000

 # Maps URL fragments → Aider --api-key provider slug.
@@ -192,11 +183,16 @@ async def aider_run(
    immediately. Use agent_status(agent_id) to check progress; set notify=True to
    receive a push/Talk notification on completion.
    """
-    resolved = _PROJECT_ALIASES.get(project, project)
-    cwd = Path(os.path.expanduser(resolved))
+    proj_def = PROJECT_ALIASES.get(project)
+    if proj_def is not None:
+        cwd = Path(os.path.expanduser(proj_def.path))
+        ssh_host = proj_def.ssh_host
+    else:
+        cwd = Path(os.path.expanduser(project))
+        ssh_host = ""

-    if not cwd.is_dir():
-        return f"Error: project directory '{resolved}' does not exist."
+    if not ssh_host and not cwd.is_dir():
+        return f"Error: project directory '{cwd}' does not exist."

    timeout = min(max(int(timeout), 10), 600)

@@ -232,11 +228,22 @@ async def aider_run(
        cmd += ["--file", f]

    logger.info(
-        "aider_run: project=%s model=%s host_label=%s auto_commit=%s background=%s task=%.120s",
-        project, model, host_label, auto_commit, background, task,
+        "aider_run: project=%s ssh_host=%s model=%s host_label=%s auto_commit=%s background=%s task=%.120s",
+        project, ssh_host or "local", model, host_label, auto_commit, background, task,
    )

    async def _run() -> str:
+        if ssh_host:
+            # Run aider natively on the remote host via a login shell so PATH
+            # includes ~/.local/bin where aider is typically installed.
+            inner_cmd = "cd " + shlex.quote(str(cwd)) + " && " + shlex.join(cmd)
+            ssh_cmd = f"bash -l -c {shlex.quote(inner_cmd)}"
+            proc = await asyncio.create_subprocess_exec(
+                "ssh", ssh_host, ssh_cmd,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+            )
+        else:
            proc = await asyncio.create_subprocess_exec(
                *cmd,
                cwd=str(cwd),
@@ -323,6 +330,8 @@ DECLARATIONS = [
            "Credentials are resolved automatically from the Cortex model registry — "
            "OpenRouter, local Open WebUI/Ollama, Anthropic API, and other configured hosts "
            "are all supported. Use host_label to pick a specific host. "
+            "aether_api, aether_frontend, and aether_container run aider natively on the "
+            "workstation (scott-wks-main-i7) via SSH — aider must be installed there. "
            "Set background=True for long tasks — returns an agent_id immediately and sends "
            "a notification when done. ADMIN ONLY. Requires confirmation."
        ),
--- a/cortex/tools/git.py
+++ b/cortex/tools/git.py
@@ -13,26 +13,23 @@ Write operations (admin-only, confirm-required):
 All tools accept an optional `project` parameter using the same aliases as aider_run:
  "cortex" (default), "aether_api", "aether_frontend", "aether_container"
 Or pass an absolute path directly.
+
+Projects with an ssh_host defined in _projects.py run all git commands on the remote
+host via SSH, using shlex-quoted commands to handle paths and arguments safely.
 """

 import asyncio
 import logging
 import os
+import shlex
 from pathlib import Path

 from google.genai import types

+from ._projects import PROJECT_ALIASES
+
 logger = logging.getLogger(__name__)

-_CORTEX_ROOT: Path = Path(__file__).parent.parent.parent.resolve()
-
-_PROJECT_ALIASES: dict[str, str] = {
-    "cortex":           str(_CORTEX_ROOT),
-    "aether_api":       "~/OSIT_dev/aether_api_fastapi",
-    "aether_frontend":  "~/OSIT_dev/aether_app_sveltekit",
-    "aether_container": "~/OSIT_dev/aether_container_env",
-}
-
 _MAX_OUTPUT = 50_000

 _PROJECT_PARAM = types.Schema(
@@ -45,16 +42,29 @@ _PROJECT_PARAM = types.Schema(
 )


-def _resolve_project(project: str) -> Path:
-    """Resolve a project alias or path string to an absolute Path."""
+def _resolve_project(project: str) -> tuple[Path, str]:
+    """Return (path, ssh_host). path may not exist locally when ssh_host is set."""
    if not project:
-        return _CORTEX_ROOT
-    resolved = _PROJECT_ALIASES.get(project, project)
-    return Path(os.path.expanduser(resolved))
+        d = PROJECT_ALIASES["cortex"]
+    else:
+        d = PROJECT_ALIASES.get(project)
+        if d is None:
+            # Raw path — no SSH routing
+            return Path(os.path.expanduser(project)), ""
+    return Path(os.path.expanduser(d.path)), d.ssh_host


-async def _git(*args: str, cwd: Path, timeout: int = 15) -> tuple[int, str]:
-    """Run a git command in cwd. Returns (returncode, combined output)."""
+async def _git(*args: str, cwd: Path, ssh_host: str = "", timeout: int = 15) -> tuple[int, str]:
+    """Run a git command locally or via SSH. Returns (returncode, combined output)."""
+    if ssh_host:
+        # Build a single shell-safe command string for the remote shell
+        remote_cmd = shlex.join(["git", "-C", str(cwd)] + list(args))
+        proc = await asyncio.create_subprocess_exec(
+            "ssh", ssh_host, remote_cmd,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+    else:
        proc = await asyncio.create_subprocess_exec(
            "git", "-C", str(cwd), *args,
            stdout=asyncio.subprocess.PIPE,
@@ -80,10 +90,10 @@ def _cap(text: str) -> str:

 async def git_status(project: str = "") -> str:
    """Return the working tree status for a project."""
-    cwd = _resolve_project(project)
-    if not cwd.is_dir():
+    cwd, ssh_host = _resolve_project(project)
+    if not ssh_host and not cwd.is_dir():
        return f"Error: project directory not found: {cwd}"
-    rc, out = await _git("status", cwd=cwd)
+    rc, out = await _git("status", cwd=cwd, ssh_host=ssh_host)
    if rc != 0:
        return f"git status failed: {out}"
    return out or "Working tree clean — nothing to report."
@@ -91,8 +101,8 @@ async def git_status(project: str = "") -> str:

 async def git_log(n: int = 20, path: str = "", oneline: bool = True, project: str = "") -> str:
    """Return recent commit history for a project."""
-    cwd = _resolve_project(project)
-    if not cwd.is_dir():
+    cwd, ssh_host = _resolve_project(project)
+    if not ssh_host and not cwd.is_dir():
        return f"Error: project directory not found: {cwd}"
    args = ["log"]
    if oneline:
@@ -102,7 +112,7 @@ async def git_log(n: int = 20, path: str = "", oneline: bool = True, project: st
    args += [f"-{max(1, min(n, 200))}"]
    if path:
        args += ["--", path]
-    rc, out = await _git(*args, cwd=cwd)
+    rc, out = await _git(*args, cwd=cwd, ssh_host=ssh_host)
    if rc != 0:
        return f"git log failed: {out}"
    return _cap(out) or "No commits found."
@@ -110,8 +120,8 @@ async def git_log(n: int = 20, path: str = "", oneline: bool = True, project: st

 async def git_diff(ref_a: str = "", ref_b: str = "", path: str = "", stat_only: bool = False, project: str = "") -> str:
    """Show a diff for a project. Defaults to working tree vs HEAD."""
-    cwd = _resolve_project(project)
-    if not cwd.is_dir():
+    cwd, ssh_host = _resolve_project(project)
+    if not ssh_host and not cwd.is_dir():
        return f"Error: project directory not found: {cwd}"
    args = ["diff"]
    if stat_only:
@@ -122,7 +132,7 @@ async def git_diff(ref_a: str = "", ref_b: str = "", path: str = "", stat_only:
        args += [ref_a]
    if path:
        args += ["--", path]
-    rc, out = await _git(*args, cwd=cwd)
+    rc, out = await _git(*args, cwd=cwd, ssh_host=ssh_host)
    # diff exits 1 when differences exist — normal
    if rc not in (0, 1):
        return f"git diff failed: {out}"
@@ -133,29 +143,27 @@ async def git_diff(ref_a: str = "", ref_b: str = "", path: str = "", stat_only:

 async def git_commit(message: str, project: str = "", files: list[str] | None = None) -> str:
    """Stage files and create a commit in a project."""
-    cwd = _resolve_project(project)
-    if not cwd.is_dir():
+    cwd, ssh_host = _resolve_project(project)
+    if not ssh_host and not cwd.is_dir():
        return f"Error: project directory not found: {cwd}"
    if not message.strip():
        return "Error: commit message is required."

-    # Stage specified files or all changes
    if files:
        for f in files:
-            rc, out = await _git("add", "--", f, cwd=cwd)
+            rc, out = await _git("add", "--", f, cwd=cwd, ssh_host=ssh_host)
            if rc != 0:
                return f"git add '{f}' failed: {out}"
    else:
-        rc, out = await _git("add", "-A", cwd=cwd)
+        rc, out = await _git("add", "-A", cwd=cwd, ssh_host=ssh_host)
        if rc != 0:
            return f"git add -A failed: {out}"

-    # Check that something is actually staged
-    rc, staged = await _git("diff", "--cached", "--stat", cwd=cwd)
+    rc, staged = await _git("diff", "--cached", "--stat", cwd=cwd, ssh_host=ssh_host)
    if not staged.strip():
        return "Nothing staged to commit — working tree already clean."

-    rc, out = await _git("commit", "-m", message, cwd=cwd)
+    rc, out = await _git("commit", "-m", message, cwd=cwd, ssh_host=ssh_host)
    if rc != 0:
        return f"git commit failed: {out}"
    return out or "Committed successfully."
@@ -163,15 +171,15 @@ async def git_commit(message: str, project: str = "", files: list[str] | None =

 async def git_push(project: str = "", remote: str = "origin", branch: str = "") -> str:
    """Push the current branch to a remote."""
-    cwd = _resolve_project(project)
-    if not cwd.is_dir():
+    cwd, ssh_host = _resolve_project(project)
+    if not ssh_host and not cwd.is_dir():
        return f"Error: project directory not found: {cwd}"

    args = ["push", remote]
    if branch:
        args.append(branch)

-    rc, out = await _git(*args, cwd=cwd, timeout=30)
+    rc, out = await _git(*args, cwd=cwd, ssh_host=ssh_host, timeout=30)
    if rc != 0:
        return f"git push failed: {out}"
    return out or f"Pushed to {remote} successfully."
@@ -185,7 +193,8 @@ DECLARATIONS = [
        description=(
            "Show the working tree status for a project: staged changes, unstaged "
            "modifications, and untracked files. Use before committing to see what "
-            "will be included. Defaults to the Cortex project."
+            "will be included. Defaults to the Cortex project. "
+            "aether_api, aether_frontend, and aether_container run on the workstation via SSH."
        ),
        parameters=types.Schema(
            type=types.Type.OBJECT,
@@ -197,7 +206,8 @@ DECLARATIONS = [
        description=(
            "Show recent commit history for a project. Returns commit hashes, dates, "
            "and messages. Use after aider_run completes to see what was committed. "
-            "Defaults to the Cortex project."
+            "Defaults to the Cortex project. "
+            "aether_api, aether_frontend, and aether_container run on the workstation via SSH."
        ),
        parameters=types.Schema(
            type=types.Type.OBJECT,
@@ -226,7 +236,8 @@ DECLARATIONS = [
            "With ref_a only: changes between that ref and HEAD. "
            "With ref_a and ref_b: changes between the two refs. "
            "Use after aider_run (auto_commit=False) to review changes before committing. "
-            "Defaults to the Cortex project."
+            "Defaults to the Cortex project. "
+            "aether_api, aether_frontend, and aether_container run on the workstation via SSH."
        ),
        parameters=types.Schema(
            type=types.Type.OBJECT,
@@ -257,6 +268,7 @@ DECLARATIONS = [
            "Stage files and create a git commit in a project. "
            "Use after reviewing changes with git_diff — especially when aider_run ran "
            "with auto_commit=False. Stages all changes by default (files=None). "
+            "aether_api, aether_frontend, and aether_container commit on the workstation via SSH. "
            "ADMIN ONLY. Requires confirmation."
        ),
        parameters=types.Schema(
@@ -284,6 +296,7 @@ DECLARATIONS = [
        description=(
            "Push the current branch to a remote. "
            "Use after git_commit or after aider_run commits to share the changes. "
+            "aether_api, aether_frontend, and aether_container push on the workstation via SSH. "
            "ADMIN ONLY. Requires confirmation."
        ),
        parameters=types.Schema(
--- a/documentation/ARCH__BACKENDS.md
+++ b/documentation/ARCH__BACKENDS.md
@@ -1,20 +1,21 @@
 # Architecture: LLM Backends

 > How Cortex selects and talks to AI models.
-> Last updated: 2026-05-06
+> Last updated: 2026-06-18

 ---

 ## Providers

-Cortex supports four model types, each dispatched differently:
+Cortex supports two model types, each dispatched differently:

 | Type | Auth | Use |
 |---|---|---|
-| `claude_cli` | OAuth token from `~/.claude/.credentials.json` | Chat, persona responses |
-| `gemini_cli` | Gemini CLI credentials | Chat fallback / explicit selection |
-| `gemini_api` | API key from registry account or `.env` | Orchestrator tool loop |
-| `local_openai` | API key per host in model registry | Open WebUI, Ollama, OpenRouter, LiteLLM, etc. |
+| `local_openai` | API key per host in model registry | Open WebUI, Ollama, OpenRouter, LiteLLM, any OpenAI-compatible endpoint |
+| `anthropic_api` | API key in model registry (Anthropic cloud provider) | Claude models via Anthropic SDK |
+
+The Gemini API (`gemini_api`) is a third type used exclusively by the orchestrator engine —
+it is not dispatched through `llm_client.py` and is not available for chat/distill roles.

 ---

@@ -22,40 +23,36 @@ Cortex supports four model types, each dispatched differently:

 ### Default: Role-Based Routing (Auto)

-When no explicit backend is selected, Cortex routes to the model configured for the
-request's **role** in the user's model registry. Roles: `chat`, `orchestrator`, `distill`,
-`coder`, `research` (extensible via `DEFINED_ROLES` in `.env`).
+All routing goes through the user's model registry. When a request arrives, `complete()` in
+`llm_client.py` resolves the model for the given role:

-Resolution order for a role:
-1. User registry: `roles[role].primary → backup_1 → backup_2 → backup_3 → backup_4`
-2. `.env` role default: `ROLE_CHAT=claude_cli`, `ROLE_DISTILL=claude_cli`, etc.
-3. Hardcoded last-resort: `chat/distill/coder → claude_cli`, `orchestrator/research → gemini_api`
-
-### Explicit Override
-
-The **Role** toggle in the Context & Memory panel cycles through configured role slots for the `chat` role: **Primary → Backup 1 → Backup 2 → auto**.
-
- Each slot shows the configured model label
- `auto` uses the Primary without forcing a specific backend type
- The ⚡ Tools toggle is independent — it routes to the `orchestrator` role regardless of the chat role selection
-
-**Fallback chain** (automatic, only when no explicit registry entry exists):
 ```
-claude  → gemini
-gemini  → claude
-local   → claude
+slot specified  →  resolve that exact slot (primary / backup_1 / backup_2)
+no slot         →  get_model_for_role(username, role)
+no registry entry  →  RuntimeError: "No model configured for role '...'"
 ```
-When a model is explicitly configured in the registry, errors surface immediately — no silent fallback.

-Each response shows a model tag (bottom-right of the message bubble) with the model label and host.
+Roles: `chat`, `orchestrator`, `distill`, `janitor`, `coder`, `research` (extensible via
+`DEFINED_ROLES` in `.env`).
+
+There is no implicit fallback to a built-in model. If no model is configured for a role,
+the request fails with a clear error directing the user to `/settings/models`.
+
+### Explicit Slot Selection
+
+The **Role** toggle in the Context & Memory panel cycles through configured role slots:
+**Primary → Backup 1 → auto**. Each slot resolves the configured model for that position.
+
+When a model is explicitly configured (via slot or registry entry), errors surface
+immediately — no silent fallback to another backend.

 ---

-## Model Registry — V2 Schema
+## Model Registry Schema

 Per-user configuration stored in `home/{user}/model_registry.json`.

-Managed at **Settings → Models** (`/settings/models`). Full provider UI coming in Phase 2.
+Managed at **Settings → Models** (`/settings/models`).

 ```json
 {
@@ -64,7 +61,7 @@ Managed at **Settings → Models** (`/settings/models`). Full provider UI coming
  "providers": {
    "anthropic": {
      "credentials": [
-        {"id": "cli", "label": "Claude CLI (OAuth)", "type": "cli"}
+        {"id": "key1", "label": "My Anthropic Key", "type": "api_key", "api_key": "sk-ant-..."}
      ]
    },
    "google": {
@@ -77,6 +74,13 @@ Managed at **Settings → Models** (`/settings/models`). Full provider UI coming
  "hosts": [
    {
      "id": "abc123",
+      "label": "OpenRouter",
+      "api_url": "https://openrouter.ai/api/v1",
+      "api_key": "sk-or-...",
+      "host_type": "openai"
+    },
+    {
+      "id": "def456",
      "label": "Gaming Laptop",
      "api_url": "http://192.168.x.x:3000",
      "api_key": "",
@@ -87,23 +91,22 @@ Managed at **Settings → Models** (`/settings/models`). Full provider UI coming
  "models": [
    {
      "id": "m1",
-      "type": "claude_cli",
-      "label": "Sonnet 4.6 (CLI)",
-      "model_name": "claude-sonnet-4-6",
-      "provider": "anthropic",
-      "credential_id": "cli",
+      "type": "local_openai",
+      "label": "Claude Sonnet 4.6 (OpenRouter)",
+      "model_name": "anthropic/claude-sonnet-4-6",
+      "host_id": "abc123",
      "context_k": 200,
      "tags": ["chat", "persona"]
    },
    {
      "id": "m2",
-      "type": "gemini_api",
-      "label": "Gemini 2.5 Flash (OSIT)",
-      "model_name": "gemini-2.5-flash",
-      "provider": "google",
-      "account_id": "a1b2",
-      "context_k": 1000,
-      "tags": ["orchestrator", "research"]
+      "type": "anthropic_api",
+      "label": "Claude Sonnet 4.6 (Direct)",
+      "model_name": "claude-sonnet-4-6",
+      "provider": "anthropic",
+      "credential_id": "key1",
+      "context_k": 200,
+      "tags": ["chat"]
    },
    {
      "id": "m3",
@@ -111,7 +114,7 @@ Managed at **Settings → Models** (`/settings/models`). Full provider UI coming
      "label": "Gemma 4 E4B",
      "model_name": "gemma4:e4b",
      "provider": "local",
-      "host_id": "abc123",
+      "host_id": "def456",
      "context_k": 72,
      "max_rounds": 5,
      "tools": true,
@@ -120,8 +123,8 @@ Managed at **Settings → Models** (`/settings/models`). Full provider UI coming
  ],

  "roles": {
-    "chat":         {"primary": "m1", "backup_1": "m2", "backup_2": "m3"},
-    "orchestrator": {"primary": "m2", "backup_1": "m3"},
+    "chat":         {"primary": "m1", "backup_1": "m2"},
+    "orchestrator": {"primary": "m2"},
    "distill":      {"primary": "m1"}
  }
 }
@@ -145,52 +148,9 @@ Managed at **Settings → Models** (`/settings/models`). Full provider UI coming
 Set `api_url` to the base path before `/chat/completions`:
 - OpenRouter: `https://openrouter.ai/api/v1`

-### Built-in model IDs
-
-Always resolvable without a user-created registry entry. Used as role defaults.
-
-| ID | Type | Notes |
-|---|---|---|
-| `claude_cli` | `claude_cli` | Model from `DEFAULT_MODEL` in `.env` |
-| `gemini_cli` | `gemini_cli` | Gemini CLI subprocess |
-| `gemini_api` | `gemini_api` | Model from `ORCHESTRATOR_MODEL` in `.env`; key from `GEMINI_API_KEY` |
-
-### V1 → V2 migration
-
-Automatic on first load. Changes:
- Adds `providers` section (Anthropic CLI credential + empty Google accounts)
- Migrates `gemini_api_key` from `auth.json` → `providers.google.accounts[0]`
- All existing hosts, models, and role assignments are preserved
-
 ---

-## Claude Backend (`_claude()`)
-
-Runs `claude --print --no-session-persistence --output-format text` as a subprocess.
-
- System prompt passed via `--system-prompt`
- Conversation history formatted as `<conversation>` block
- Token read live from `~/.claude/.credentials.json` on every call — never uses the
-  env var, which goes stale after `claude auth login`
- Model override via `--model` flag when `model_name` is set in the registry entry
-
-Timeout: `TIMEOUT_CLAUDE=60` seconds (`.env`)
-
---
-
-## Gemini CLI Backend (`_gemini()`)
-
-Runs `gemini --output-format text --extensions "" -p <prompt>` as a subprocess.
-
- `--extensions ""` disables all MCP extensions — prevents child processes keeping pipes open
- `start_new_session=True` puts the process in its own group for clean `os.killpg` on timeout
- Output is cleaned to strip CLI noise (loading messages, retry notices, quota warnings)
-
-Timeout: `TIMEOUT_GEMINI=120` seconds (`.env`)
-
---
-
-## Local Backend (`_local()`)
+## Local/OpenAI-Compatible Backend (`_local()`)

 HTTP POST to an OpenAI-compatible endpoint. Model config is resolved via the model registry.

@@ -199,13 +159,36 @@ HTTP POST to an OpenAI-compatible endpoint. Model config is resolved via the mod
 # host_type "openai":     POST {api_url}/chat/completions
 ```

+System prompt is sent as the first `{"role": "system", "content": "..."}` message.
+Image attachments are injected into the last user message as `image_url` content blocks.
+Token usage is recorded when returned by the endpoint.
+
+Streaming variant: `_local_streaming()` — SSE line-by-line, yields tokens via `token_sink`.
+
 Timeout: `TIMEOUT_LOCAL=300` seconds (`.env`) — local models may need to load from disk.

 ---

-## Gemini API (Orchestrator)
+## Anthropic API Backend (`_anthropic_api()`)

-Used by `orchestrator_engine.py` for the ReAct tool loop. Not used for general chat.
+Direct call to the Anthropic Messages API via the `anthropic` Python SDK.
+
+System prompt passed as top-level `system` field. Messages stripped to `role`/`content` only.
+Token usage is always recorded from `resp.usage`.
+
+Streaming variant: `_anthropic_api_streaming()` — uses `client.messages.stream()`, yields
+tokens via `token_sink`.
+
+API key comes from the model registry: `providers.anthropic.credentials[n].api_key`.
+
+Timeout: governed by httpx defaults and the Anthropic SDK's own connection handling.
+
+---
+
+## Gemini API (Orchestrator only)
+
+Used by `orchestrator_engine.py` for the ReAct tool loop. Not dispatched through
+`llm_client.py` and not available for chat, distill, or other roles.

 API key resolution order:
 1. `api_key` embedded in the resolved orchestrator model config (V2 registry with `account_id`)
@@ -217,9 +200,7 @@ API key resolution order:
 ## Distillation

 Memory distillation uses `role="distill"`. Configure via Model Registry → Role Assignments.
-
-`.env` override: `ROLE_DISTILL=claude_cli` (default).
-
+Any `local_openai` or `anthropic_api` model can be assigned to the distill role.

 ---

@@ -232,4 +213,4 @@ Memory distillation uses `role="distill"`. Configure via Model Registry → Role
 | `cortex/routers/local_llm.py` | Settings UI routes + `/api/models/role` AJAX |
 | `cortex/routers/chat.py` | `_backend_label()`, `fallback_used` flag |
 | `cortex/routers/orchestrator.py` | Engine selection, Gemini API key resolution |
-| `cortex/config.py` | `ROLE_*` env defaults, `DEFINED_ROLES`, `PRIMARY_BACKEND` |
+| `cortex/config.py` | `ROLE_*` env defaults, `DEFINED_ROLES`, `TIMEOUT_LOCAL` |