feat: audit log, usage tracking UI, OpenAI orchestrator compaction, onboarding + docs

Tool audit log: - Every orchestrator tool call logged to home/{user}/tool_audit/YYYY-MM-DD.jsonl - Files panel sidebar: audit log group (collapsed), date-linked read-only table - Admin endpoints: /api/audit/files, /api/audit/day, /api/audit/recent, /api/audit/stats - Engine and model name recorded per entry OpenAI orchestrator improvements: - Context budget enforcement: 75% of model context_k (min 16k) - Message compaction: truncates old tool results when approaching budget - max_rounds respected per model config (intersected with server cap) OpenRouter onboarding (setup.html, onboarding.py, app.js, settings.html): - Step 3 of 3: /setup/model with curated model picker - Chat banner for users on server-default model (informational, not alarmist) - Settings quick-link card; /setup/model works standalone for existing users Model registry + session store: - set_role_config / get_role_config for per-role tool lists and system_append - session_store: session rename, session name backfill endpoint UI updates (app.js, index.html, style.css, local_llm.html): - Role toggle in context panel - Off-the-record mode - Agent notes read-only viewer - OPERATIONS.md loaded at T2+ in context Documentation: - HELP.md: full tool table, per-role tool sets, Agent Notes, usage tracking - TOOLS.md: Agent Notes section, count corrected to 44 - ARCH__SYSTEM.md, ARCH__BACKENDS.md, MASTER.md updated to match reality - CLAUDE.md: onboarding flow, documentation philosophy sections - README.md: stack in practice, DeepSeek TUI mention, architecture diagram updated - TODO__Agents.md: onboarding task completed with deviation notes Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-08 21:26:43 -04:00
parent c02d2462b0
commit f8f7cd75da
25 changed files with 1088 additions and 151 deletions
--- a/cortex/routers/chat.py
+++ b/cortex/routers/chat.py
@@ -295,6 +295,53 @@ async def rename_session_endpoint(
    return {"ok": True, "session_id": session_id, "name": req.name.strip()}


+@router.post("/api/sessions/backfill-names")
+async def backfill_session_names(
+    request: Request,
+    user: str = Query(""),
+    persona: str = Query(""),
+) -> dict:
+    """Name every unnamed session using its first user message (truncated to 60 chars).
+    Idempotent — only touches sessions that have no name set.
+    user/persona default to the JWT session user + last-used persona cookie."""
+    # Resolve user from JWT if not provided
+    if not user:
+        token = request.cookies.get(COOKIE_NAME)
+        if not token:
+            raise HTTPException(status_code=401, detail="Not authenticated")
+        try:
+            user = decode_token(token)
+        except jwt.InvalidTokenError:
+            raise HTTPException(status_code=401, detail="Invalid session")
+
+    # Resolve persona from cookie if not provided
+    if not persona:
+        from persona import list_user_personas
+        persona_cookie = request.cookies.get("cx_last_persona", "")
+        available = list_user_personas(user)
+        persona = persona_cookie if persona_cookie in available else (available[0] if available else "")
+    if not persona:
+        raise HTTPException(status_code=400, detail="No persona found for user")
+
+    _set_ctx(user, persona)
+    sessions = list_all()
+    named = 0
+    for s in sessions:
+        if s.get("name"):
+            continue
+        messages = load_session(s["session_id"])
+        first_user = next((m for m in messages if m.get("role") == "user"), None)
+        if not first_user:
+            continue
+        text = (first_user.get("content") or "").strip()
+        if not text:
+            continue
+        auto_name = text[:60].rstrip() + ("…" if len(text) > 60 else "")
+        rename_session(s["session_id"], auto_name)
+        named += 1
+    return {"ok": True, "named": named, "total": len(sessions)}
+
+
@router.delete("/sessions/{session_id}")
 async def delete_session_endpoint(
    session_id: str,
--- a/cortex/routers/distill.py
+++ b/cortex/routers/distill.py
@@ -1,25 +1,50 @@
 """
 Manual memory distillation endpoints.

-  POST /distill/short  — roll session logs → MEMORY_SHORT.md (no LLM)
-  POST /distill/mid    — summarize short   → MEMORY_MID.md   (LLM)
-  POST /distill/long   — integrate mid     → MEMORY_LONG.md  (LLM)
-  POST /distill/all    — run all three in sequence
+  POST /distill/short    — roll session logs → MEMORY_SHORT.md (no LLM)
+  POST /distill/mid      — summarize short   → MEMORY_MID.md   (LLM)
+  POST /distill/long     — integrate mid     → MEMORY_LONG.md  (LLM)
+  POST /distill/all      — run all three in sequence
+  POST /distill/rebuild  — wipe mid + long, then run all three from scratch

-All endpoints require ?user=<username>&persona=<name> query params so distillation
-targets the correct persona. Without them, the request is rejected (no silent fallback
-to server defaults — that caused wrong-user distillation in a multi-user setup).
+All endpoints require ?user=<username>&persona=<name> query params.
+
+Concurrency: one distillation at a time per persona. A second request while one
+is running returns 409 immediately — no silent queuing.
 """
+import asyncio
+from datetime import datetime, timedelta
 from fastapi import APIRouter, HTTPException, Query
 from memory_distiller import distill_short, distill_mid, distill_long
-from persona import validate as validate_persona, set_context
+from persona import validate as validate_persona, set_context, persona_path as _persona_path
 import scheduler

 router = APIRouter(prefix="/distill")

+# Per-persona asyncio lock. Key: (user, persona)
+_LOCKS: dict[tuple, asyncio.Lock] = {}
+_LOCKS_META: dict[tuple, str] = {}  # key → which step is currently running
+
+# Minimum time between successive runs of each endpoint, per persona.
+# Prevents accidental rapid-fire runs and token waste.
+_COOLDOWNS: dict[tuple, timedelta] = {
+    "short":   timedelta(minutes=1),
+    "mid":     timedelta(minutes=30),
+    "long":    timedelta(hours=6),
+    "all":     timedelta(hours=1),
+    "rebuild": timedelta(hours=6),
+}
+_LAST_RUN: dict[tuple, datetime] = {}  # key: (user, persona, endpoint)
+
+
+def _get_lock(user: str, persona: str) -> asyncio.Lock:
+    key = (user, persona)
+    if key not in _LOCKS:
+        _LOCKS[key] = asyncio.Lock()
+    return _LOCKS[key]
+

 def _resolve(user: str, persona: str) -> tuple[str, str]:
-    """Validate and set persona context. Raises 404 if the persona doesn't exist."""
    try:
        u, p = validate_persona(user, persona)
    except Exception:
@@ -28,13 +53,51 @@ def _resolve(user: str, persona: str) -> tuple[str, str]:
    return u, p


+def _check_lock(user: str, persona: str) -> asyncio.Lock:
+    """Return the lock if free, raise 409 if already held."""
+    lock = _get_lock(user, persona)
+    if lock.locked():
+        step = _LOCKS_META.get((user, persona), "distillation")
+        raise HTTPException(
+            status_code=409,
+            detail=f"A {step} is already running for {persona} — please wait for it to finish.",
+        )
+    return lock
+
+
+def _check_cooldown(user: str, persona: str, endpoint: str) -> None:
+    """Raise 429 if the endpoint was run too recently for this persona."""
+    cooldown = _COOLDOWNS.get(endpoint)
+    if not cooldown:
+        return
+    key = (user, persona, endpoint)
+    last = _LAST_RUN.get(key)
+    if last:
+        elapsed = datetime.now() - last
+        if elapsed < cooldown:
+            remaining = cooldown - elapsed
+            mins = int(remaining.total_seconds() // 60)
+            secs = int(remaining.total_seconds() % 60)
+            wait = f"{mins}m {secs}s" if mins else f"{secs}s"
+            raise HTTPException(
+                status_code=429,
+                detail=f"{endpoint} was just run — please wait {wait} before running again.",
+            )
+
+
+def _record_run(user: str, persona: str, endpoint: str) -> None:
+    _LAST_RUN[(user, persona, endpoint)] = datetime.now()
+
+
@router.get("/status")
 async def distill_status() -> dict:
-    """Show auto-distillation schedule and next run times."""
    from config import settings
+    # Include which personas are currently distilling
+    active = [f"{u}/{p}" for (u, p), lock in _LOCKS.items() if lock.locked()]
    return {
        "enabled": settings.auto_distill,
        "jobs": scheduler.status(),
+        "active": active,
        "config": {
            "short": settings.auto_distill_short,
            "mid": settings.auto_distill_mid,
@@ -49,7 +112,16 @@ async def do_distill_short(
    persona: str = Query(...),
 ) -> dict:
    u, p = _resolve(user, persona)
-    return {"ok": True, **distill_short(u, p)}
+    _check_cooldown(u, p, "short")
+    lock = _check_lock(u, p)
+    async with lock:
+        _LOCKS_META[(u, p)] = "short distill"
+        try:
+            result = distill_short(u, p)
+            _record_run(u, p, "short")
+            return {"ok": True, **result}
+        finally:
+            _LOCKS_META.pop((u, p), None)


@router.post("/mid")
@@ -58,8 +130,17 @@ async def do_distill_mid(
    persona: str = Query(...),
 ) -> dict:
    u, p = _resolve(user, persona)
-    result = await distill_mid(u, p)
-    return {"ok": "error" not in result, **result}
+    _check_cooldown(u, p, "mid")
+    lock = _check_lock(u, p)
+    async with lock:
+        _LOCKS_META[(u, p)] = "mid distill"
+        try:
+            result = await distill_mid(u, p)
+            if "error" not in result:
+                _record_run(u, p, "mid")
+            return {"ok": "error" not in result, **result}
+        finally:
+            _LOCKS_META.pop((u, p), None)


@router.post("/long")
@@ -68,8 +149,17 @@ async def do_distill_long(
    persona: str = Query(...),
 ) -> dict:
    u, p = _resolve(user, persona)
-    result = await distill_long(u, p)
-    return {"ok": "error" not in result, **result}
+    _check_cooldown(u, p, "long")
+    lock = _check_lock(u, p)
+    async with lock:
+        _LOCKS_META[(u, p)] = "long distill"
+        try:
+            result = await distill_long(u, p)
+            if "error" not in result:
+                _record_run(u, p, "long")
+            return {"ok": "error" not in result, **result}
+        finally:
+            _LOCKS_META.pop((u, p), None)


@router.post("/all")
@@ -78,14 +168,71 @@ async def do_distill_all(
    persona: str = Query(...),
 ) -> dict:
    u, p = _resolve(user, persona)
-    short_result = distill_short(u, p)
-    mid_result = await distill_mid(u, p)
-    if "error" in mid_result:
-        return {"ok": False, "short": short_result, "mid": mid_result}
-    long_result = await distill_long(u, p)
-    return {
-        "ok": "error" not in long_result,
-        "short": short_result,
-        "mid": mid_result,
-        "long": long_result,
-    }
+    _check_cooldown(u, p, "all")
+    lock = _check_lock(u, p)
+    async with lock:
+        _LOCKS_META[(u, p)] = "full distill"
+        try:
+            short_result = distill_short(u, p)
+            mid_result = await distill_mid(u, p)
+            if "error" in mid_result:
+                return {"ok": False, "short": short_result, "mid": mid_result}
+            long_result = await distill_long(u, p)
+            ok = "error" not in long_result
+            if ok:
+                _record_run(u, p, "all")
+            return {
+                "ok": ok,
+                "short": short_result,
+                "mid": mid_result,
+                "long": long_result,
+            }
+        finally:
+            _LOCKS_META.pop((u, p), None)
+
+
+@router.post("/rebuild")
+async def do_distill_rebuild(
+    user: str = Query(...),
+    persona: str = Query(...),
+) -> dict:  # noqa: E501
+    """Wipe MEMORY_MID and MEMORY_LONG (with backups), then run short → mid → long.
+
+    Use when memories have drifted, been corrupted, or you want a clean slate
+    rebuilt purely from session logs. Hand-edited content will be replaced.
+    """
+    u, p = _resolve(user, persona)
+    _check_cooldown(u, p, "rebuild")
+    lock = _check_lock(u, p)
+    async with lock:
+        _LOCKS_META[(u, p)] = "memory rebuild"
+        try:
+            from memory_distiller import _rotate_backup, _read
+            inara_dir = _persona_path(u, p)
+
+            # Back up then wipe mid and long before rebuilding
+            for name in ("MEMORY_MID.md", "MEMORY_LONG.md"):
+                path = inara_dir / name
+                if path.exists():
+                    _rotate_backup(path)
+                    path.write_text(
+                        f"# {name}\n\n*Cleared for rebuild — {__import__('datetime').datetime.now().strftime('%Y-%m-%d %H:%M')}.*\n"
+                    )
+
+            short_result = distill_short(u, p)
+            mid_result = await distill_mid(u, p)
+            if "error" in mid_result:
+                return {"ok": False, "short": short_result, "mid": mid_result, "rebuilt": True}
+            long_result = await distill_long(u, p)
+            ok = "error" not in long_result
+            if ok:
+                _record_run(u, p, "rebuild")
+            return {
+                "ok": ok,
+                "short": short_result,
+                "mid": mid_result,
+                "long": long_result,
+                "rebuilt": True,
+            }
+        finally:
+            _LOCKS_META.pop((u, p), None)
--- a/cortex/routers/files.py
+++ b/cortex/routers/files.py
@@ -27,10 +27,21 @@ ALLOWED = {
    "MEMORY_SHORT.bak1.md",
    "MEMORY_SHORT.bak2.md",
    "HELP.md",
+    # Agent private notes — backups only; AGENT_NOTES.md itself is agent-only
+    "AGENT_NOTES.bak1.md",
+    "AGENT_NOTES.bak2.md",
+    "AGENT_NOTES.bak3.md",
+}
+
+# Files that can be read via the panel but not written by users
+READ_ONLY = {
+    "AGENT_NOTES.bak1.md",
+    "AGENT_NOTES.bak2.md",
+    "AGENT_NOTES.bak3.md",
 }

 # Files served from home/{user}/ instead of persona path
-USER_FILES = {"email_allowlist.json"}
+USER_FILES = {"email_allowlist.json", "usage.json"}


 def _resolve(user: str, persona: str) -> None:
@@ -92,7 +103,11 @@ async def get_file(
    p = _path(filename, user=user)
    if not p.exists():
        raise HTTPException(status_code=404, detail=f"{filename} does not exist")
-    return {"name": filename, "content": p.read_text()}
+    return {
+        "name": filename,
+        "content": p.read_text(),
+        "readonly": filename in READ_ONLY,
+    }


 class FileWrite(BaseModel):
@@ -106,6 +121,8 @@ async def save_file(
    user: str = Query("scott"),
    persona: str = Query("inara"),
 ) -> dict:
+    if filename in READ_ONLY:
+        raise HTTPException(status_code=403, detail=f"{filename} is read-only.")
    _resolve(user, persona)
    p = _path(filename, user=user)
    p.write_text(req.content)
--- a/cortex/routers/local_llm.py
+++ b/cortex/routers/local_llm.py
@@ -159,7 +159,8 @@ def _render(username: str, success: str = "", error: str = "") -> str:
        else:
            secondary = default_secondary

-        ctx      = f'<span class="ctx-badge">{m.get("context_k",0)}k</span>' if m.get("context_k") else ""
+        ctx       = f'<span class="ctx-badge">{m.get("context_k",0)}k</span>' if m.get("context_k") else ""
+        no_tools  = '' if m.get("tools", True) else '<span class="pbadge pb-notools">no tools</span>'
        tags_html = " ".join(f'<span class="tag">{t}</span>' for t in (m.get("tags") or []))
        sec      = f'<span class="model-host">{secondary}</span>' if secondary else ""

@@ -201,13 +202,15 @@ def _render(username: str, success: str = "", error: str = "") -> str:
        cur_label      = m.get("label", "")
        cur_model_name = m.get("model_name", "")
        cur_ctx        = m.get("context_k", 0) or 0
+        cur_max_rounds = m.get("max_rounds") or 0
+        cur_tools      = m.get("tools", True)
        cur_tags       = ", ".join(m.get("tags") or [])

        model_rows += f'''
        <div class="model-row" id="model-{m["id"]}">
          <div class="model-row-header">
            <div class="model-info">
-              <div>{badge}<span class="model-label">{m.get("label") or m.get("model_name","")}</span>{ctx}</div>
+              <div>{badge}<span class="model-label">{m.get("label") or m.get("model_name","")}</span>{ctx}{no_tools}</div>
              <span class="model-name">{m.get("model_name","")}</span>
              {sec}
              <div class="tag-row">{tags_html}</div>
@@ -239,8 +242,22 @@ def _render(username: str, success: str = "", error: str = "") -> str:
            {extra_fields}
            <div class="field-row">
              <div class="field" style="flex:0 0 auto">
-                <label>Context (k)</label>
-                <input type="number" name="context_k" value="{cur_ctx}" min="0">
+                <label title="Context window size in thousands of tokens. 0 = assume 32k.">Context (k)</label>
+                <input type="number" name="context_k" value="{cur_ctx}" min="0"
+                       title="Context window size in thousands of tokens. 0 = assume 32k (compaction budget ~24k tokens).">
+              </div>
+              <div class="field" style="flex:0 0 auto">
+                <label title="Per-model tool loop cap. 0 = use the global default (orchestrator_max_rounds).">Max rounds</label>
+                <input type="number" name="max_rounds" value="{cur_max_rounds}" min="0"
+                       title="Per-model tool loop cap. 0 = use the global default (orchestrator_max_rounds).">
+              </div>
+              <div class="field" style="flex:0 0 auto">
+                <label title="Whether this model supports tool calling. If not supported, requests skip the tool loop entirely.">Tool calling</label>
+                <select name="tools"
+                        title="Whether this model supports tool calling. If not supported, requests skip the tool loop entirely.">
+                  <option value="1" {'selected' if cur_tools else ''}>Supported</option>
+                  <option value="0" {'' if cur_tools else 'selected'}>Not supported</option>
+                </select>
              </div>
              <div class="field">
                <label>Tags</label>
@@ -426,6 +443,8 @@ async def add_model(
    provider:         str = Form("local"),
    label:            str = Form(""),
    context_k:        int = Form(0),
+    max_rounds:       int = Form(0),
+    tools:            int = Form(1),
    tags:             str = Form(""),
    # local-only fields
    host_id:          str = Form(""),
@@ -439,14 +458,17 @@ async def add_model(
    if not username:
        return RedirectResponse("/login", status_code=302)

-    tag_list = [t.strip() for t in tags.split(",") if t.strip()]
+    tag_list   = [t.strip() for t in tags.split(",") if t.strip()]
+    max_rounds_ = max_rounds or None
+    tools_bool  = tools != 0

    if provider == "local":
        if not model_name.strip():
            return HTMLResponse(_render(username, error="Model name is required."))
        if not host_id.strip():
            return HTMLResponse(_render(username, error="Select a host."))
-        reg.save_model(username, None, host_id, label, model_name, context_k, tag_list)
+        reg.save_model(username, None, host_id, label, model_name, context_k, tag_list,
+                       max_rounds=max_rounds_, tools=tools_bool)
        display = label or model_name

    elif provider in ("google", "anthropic"):
@@ -459,6 +481,7 @@ async def add_model(
            account_id=account_id or None,
            credential_id=credential_id or None,
            context_k=context_k, tags=tag_list,
+            max_rounds=max_rounds_, tools=tools_bool,
        )
        display = label or cloud_model_name
    else:
@@ -476,6 +499,8 @@ async def edit_model(
    label:         str = Form(""),
    model_name:    str = Form(""),
    context_k:     int = Form(0),
+    max_rounds:    int = Form(0),
+    tools:         int = Form(1),
    tags:          str = Form(""),
    host_id:       str = Form(""),
    account_id:    str = Form(""),
@@ -486,17 +511,22 @@ async def edit_model(
        return RedirectResponse("/login", status_code=302)
    if not model_name.strip():
        return HTMLResponse(_render(username, error="Model name is required."))
-    tag_list = [t.strip() for t in tags.split(",") if t.strip()]
+    tag_list    = [t.strip() for t in tags.split(",") if t.strip()]
+    max_rounds_ = max_rounds or None
+    tools_bool  = tools != 0
    if mtype == "local_openai":
        if not host_id.strip():
            return HTMLResponse(_render(username, error="Select a host for this model."))
-        reg.save_model(username, model_id, host_id, label, model_name, context_k, tag_list)
+        reg.save_model(username, model_id, host_id, label, model_name, context_k, tag_list,
+                       max_rounds=max_rounds_, tools=tools_bool)
    elif mtype == "gemini_api":
        reg.save_cloud_model(username, model_id, "google", model_name, label,
-                             account_id=account_id or None, context_k=context_k, tags=tag_list)
+                             account_id=account_id or None, context_k=context_k, tags=tag_list,
+                             max_rounds=max_rounds_, tools=tools_bool)
    elif mtype == "claude_cli":
        reg.save_cloud_model(username, model_id, "anthropic", model_name, label,
-                             credential_id=credential_id or "cli", context_k=context_k, tags=tag_list)
+                             credential_id=credential_id or "cli", context_k=context_k, tags=tag_list,
+                             max_rounds=max_rounds_, tools=tools_bool)
    else:
        return HTMLResponse(_render(username, error=f"Unknown model type: {mtype}"))
    display = label.strip() or model_name.strip()
--- a/cortex/routers/onboarding.py
+++ b/cortex/routers/onboarding.py
@@ -1,11 +1,13 @@
 """
-Onboarding router — invite-based setup + persona creation.
+Onboarding router — invite-based setup + persona creation + model connect.

 Routes:
  GET  /setup/{token}      → show password setup form (step 1)
  POST /setup/{token}      → set password, redirect to persona step
  GET  /setup/persona      → show persona creation form (step 2, requires auth)
-  POST /setup/persona      → create persona, redirect to /{user}/{persona}
+  POST /setup/persona      → create persona, redirect to /setup/model
+  GET  /setup/model        → OpenRouter quick-connect (step 3, also standalone)
+  POST /setup/model        → save host + model + assign to chat role, redirect to chat
 """

 import logging
@@ -21,6 +23,7 @@ from auth_utils import (
 )
 from persona_template import create_persona
 from persona import list_user_personas, validate as validate_persona
+import model_registry

 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/setup")
@@ -114,7 +117,11 @@ async def persona_submit(
        description=description.strip(),
    )
    logger.info("persona created: %s/%s", username, persona_name)
-    return RedirectResponse(f"/{username}/{persona_name}", status_code=302)
+    # Step 3: guided model setup before entering the chat
+    resp = RedirectResponse("/setup/model", status_code=302)
+    # Remember which persona to land on after model setup
+    resp.set_cookie("cx_setup_persona", f"{username}/{persona_name}", max_age=3600, httponly=True, samesite="lax")
+    return resp


 # ---------------------------------------------------------------------------
@@ -178,3 +185,126 @@ async def setup_submit(
        return resp

    return HTMLResponse(_setup_page("Unknown step."), status_code=400)
+
+
+# ---------------------------------------------------------------------------
+# Step 3 — model connect (OpenRouter quick-connect, also standalone)
+# ---------------------------------------------------------------------------
+
+# Curated model list shown in the Step 3 dropdown.
+_OPENROUTER_MODELS = [
+    ("anthropic/claude-3-5-haiku-20241022",  "Claude 3.5 Haiku — Fast & affordable"),
+    ("anthropic/claude-3-7-sonnet-20250219", "Claude 3.7 Sonnet — Smarter Claude"),
+    ("google/gemini-2.0-flash-001",          "Gemini 2.0 Flash — Fast Google model"),
+    ("meta-llama/llama-3.3-70b-instruct",    "Llama 3.3 70B — Open source"),
+]
+
+
+def _model_page(error: str = "", from_setup: bool = False) -> str:
+    html = (_STATIC / "setup.html").read_text()
+    # Hide steps 1 and 2 inline; show step 3
+    html = html.replace('<div id="step-password">', '<div id="step-password" style="display:none">')
+    html = html.replace('<div id="step-persona" style="display:none">', '<div id="step-persona" style="display:none">')
+    html = html.replace('<div id="step-model" style="display:none">', '<div id="step-model">')
+    if from_setup:
+        html = html.replace("<!-- SETUP_STEP3_LABEL -->", "Step 3 of 3")
+    if error:
+        html = html.replace("<!-- ERROR_MODEL -->", f'<p class="error">{error}</p>')
+    return html
+
+
+@router.post("/model/skip", include_in_schema=False)
+async def model_skip(request: Request):
+    """Skip model setup — redirect to the remembered persona or user root."""
+    from auth_utils import decode_token
+    import jwt
+    token = request.cookies.get(COOKIE_NAME)
+    username = None
+    if token:
+        try:
+            username = decode_token(token)
+        except jwt.InvalidTokenError:
+            pass
+
+    dest_cookie = request.cookies.get("cx_setup_persona", "")
+    dest = f"/{dest_cookie}" if dest_cookie else (f"/{username}" if username else "/")
+    resp = RedirectResponse(dest, status_code=302)
+    resp.delete_cookie("cx_setup_persona")
+    return resp
+
+
+@router.get("/model", include_in_schema=False)
+async def model_page(request: Request):
+    from auth_utils import decode_token
+    import jwt
+    token = request.cookies.get(COOKIE_NAME)
+    if not token:
+        return RedirectResponse("/login", status_code=302)
+    try:
+        decode_token(token)
+    except jwt.InvalidTokenError:
+        return RedirectResponse("/login", status_code=302)
+
+    from_setup = bool(request.cookies.get("cx_setup_persona"))
+    return HTMLResponse(_model_page(from_setup=from_setup))
+
+
+@router.post("/model", include_in_schema=False)
+async def model_submit(
+    request: Request,
+    api_key: str = Form(...),
+    model_name: str = Form(...),
+):
+    from auth_utils import decode_token
+    import jwt
+    token = request.cookies.get(COOKIE_NAME)
+    if not token:
+        return RedirectResponse("/login", status_code=302)
+    try:
+        username = decode_token(token)
+    except jwt.InvalidTokenError:
+        return RedirectResponse("/login", status_code=302)
+
+    api_key = api_key.strip()
+    model_name = model_name.strip()
+
+    if not api_key:
+        from_setup = bool(request.cookies.get("cx_setup_persona"))
+        return HTMLResponse(_model_page("API key is required.", from_setup=from_setup), status_code=422)
+
+    # Save OpenRouter as a host
+    host_id = model_registry.save_host(
+        username=username,
+        host_id=None,
+        label="OpenRouter",
+        api_url="https://openrouter.ai/api/v1",
+        api_key=api_key,
+        host_type="openai",
+    )
+
+    # Find label for selected model
+    label = next((lbl for mn, lbl in _OPENROUTER_MODELS if mn == model_name), model_name)
+    label = label.split(" — ")[0]  # keep just the model name part
+
+    # Save model entry
+    mid = model_registry.save_model(
+        username=username,
+        model_id=None,
+        host_id=host_id,
+        label=label,
+        model_name=model_name,
+        context_k=128,
+        tools=True,
+    )
+
+    # Assign as chat role primary
+    model_registry.set_role(username, "chat", "primary", mid)
+    logger.info("openrouter setup complete: %s → %s", username, model_name)
+
+    # Redirect to chat (use remembered persona, or user root)
+    dest_cookie = request.cookies.get("cx_setup_persona", "")
+    dest = f"/{dest_cookie}" if dest_cookie else f"/{username}"
+
+    resp = RedirectResponse(dest, status_code=302)
+    resp.delete_cookie("cx_setup_persona")
+    return resp