feat: local LLM multi-model, session search, cron proactive types, notifications, docs overhaul

Local LLM: - user_settings.py: per-user hosts/models config (local_llm.json) - routers/local_llm.py + static/local_llm.html: dedicated settings page - llm_client.py: local OpenAI-compatible backend via httpx - config.py: LOCAL_API_URL/KEY/MODEL + per-backend timeouts - Active model shown near backend toggle (amber hint text) Memory distillation: - memory_distiller.py: DISTILL_BACKEND_MID/LONG .env overrides - scheduler.py + notification.py: notify NC Talk after mid/long distill - notification.py: outbound channel abstraction (NC Talk, extensible) Session search: - routers/files.py: GET /sessions/search?q= with excerpts grouped by date - static/index.html + app.js: search UI in file sidebar with highlight - _esc() helper to prevent XSS in search results Proactive cron: - cron_runner.py: new job types — message (send directly) and brief (LLM + send) - Both support optional per-job channel override Channels: - routers/nextcloud_talk.py: consolidated using notification._send_nct_message() - routers/auth.py: local backend status in /auth/status - routers/chat.py: /backend returns {primary, fallback, local_model} object UI / UX: - Copy button for user messages (matching assistant) - Autocomplete disabled on sensitive form fields - settings.html: local model section replaced with link to /settings/local Docs overhaul: - MASTER.md hub + ARCH__SYSTEM/BACKENDS/PERSONA/CHANNELS/FUTURE.md - ARCH__Intelligence_Layer.md replaced with redirect table - CORTEX.md trimmed to vision only; README updated - OPEN_WEBUI_API.md added to docs/ Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-05 20:53:06 -04:00
parent bd6532e93a
commit a4daebdc9b
33 changed files with 2985 additions and 486 deletions
--- a/cortex/.env.example
+++ b/cortex/.env.example
@@ -52,12 +52,21 @@ NEXTCLOUD_URL=https://cloud.dgrzone.com
 NEXTCLOUD_TALK_BOT_SECRET=

 # ── LLM backends ────────────────────────────────────────────────────────────
-# Primary backend: "claude" or "gemini" (other is always fallback)
+# Primary backend: "claude", "gemini", or "local" (switchable at runtime via UI)
 PRIMARY_BACKEND=claude

 # Timeouts in seconds
 TIMEOUT_CLAUDE=60
 TIMEOUT_GEMINI=120
+TIMEOUT_LOCAL=300   # local models may need time to load
+
+# ── Local model (Open WebUI / Ollama — OpenAI-compatible API) ────────────────
+# Leave LOCAL_API_URL blank to disable. When set, "local" appears as a backend option.
+# API key: Open WebUI → Settings → Account → API Keys
+# Model: workspace alias or full Ollama model name
+LOCAL_API_URL=http://192.168.32.19:3000
+LOCAL_API_KEY=
+LOCAL_MODEL=test-agent-simple

 # ── Orchestrator (Gemini API — not Gemini CLI) ───────────────────────────────
 # Required for /orchestrate endpoint and tool use
--- a/cortex/config.py
+++ b/cortex/config.py
@@ -40,6 +40,12 @@ class Settings(BaseSettings):
    max_history_messages: int = 40  # rolling window — 20 turns (user + assistant)
    primary_backend: str = "claude"  # "claude" or "gemini" — other is always fallback

+    # Local model backend — OpenAI-compatible API (Open WebUI / Ollama)
+    # Set LOCAL_API_URL in .env to enable; leave blank to disable
+    local_api_url: str = ""            # e.g. http://192.168.32.19:3000
+    local_api_key: str = ""            # sk-... from Open WebUI → Settings → Account → API Keys
+    local_model: str = ""              # workspace or model name, e.g. test-agent-simple
+
    # Per-backend timeouts in seconds
    timeout_claude: int = 60
    timeout_gemini: int = 120   # frequently slow under load
@@ -53,6 +59,12 @@ class Settings(BaseSettings):
    auto_distill_mid: bool = True     # weekly Sunday at 03:30 — LLM summarizes short → mid
    auto_distill_long: bool = False   # monthly 1st at 04:00 — off by default (manual review recommended)

+    # Which backend to use for distillation LLM calls.
+    # "" = use primary_backend (default); "local" = use local model (saves API credits).
+    # "long" stays on default (claude/gemini) for best quality.
+    distill_backend_mid: str = ""
+    distill_backend_long: str = ""
+
    # Memory tier token budgets — soft caps used during distillation
    # Override in .env: MEMORY_BUDGET_LONG=4000 etc.
    memory_budget_long: int = 2000
--- a/cortex/cron_runner.py
+++ b/cortex/cron_runner.py
@@ -10,16 +10,20 @@ Job schema:
    "id":         "c_abc123",
    "label":      "Human-readable name",
    "schedule":   "daily:09:00",   # see parse_schedule() for all formats
-    "type":       "remind" | "note",
-    "payload":    "Text to write when the job fires",
+    "type":       "remind" | "note" | "message" | "brief",
+    "payload":    "Text or prompt when the job fires",
+    "channel":    null | "nextcloud" | "google_chat",  # for message/brief types
    "enabled":    true,
    "created_at": "ISO 8601",
    "last_run":   null | "ISO 8601"
  }

 Job types:
-  remind  → appends to inara/REMINDERS.md  (auto-loaded into Inara's context)
-  note    → appends to inara/SCRATCH.md    (read on demand via scratch_read)
+  remind   → appends to REMINDERS.md  (auto-loaded into context at tier 2+)
+  note     → appends to SCRATCH.md    (read on demand via scratch_read)
+  message  → sends payload as-is to NC Talk notification_room
+  brief    → runs LLM with payload as the prompt, sends response to NC Talk
+             (good for morning briefings, summaries, proactive check-ins)
 """

 import logging
@@ -150,6 +154,39 @@ async def run_job(job: dict) -> None:
        p.write_text(existing.rstrip() + "\n" + section)
        logger.info("cron [note] fired: %s", label)

+    elif job_type == "message":
+        # Send payload text directly to the user's notification channel
+        from notification import notify
+        username = job.get("user") or "scott"
+        channel  = job.get("channel") or None
+        await notify(username, payload, channel=channel)
+        logger.info("cron [message] sent: %s", label)
+
+    elif job_type == "brief":
+        # Run LLM with payload as the prompt, send response to notification channel.
+        # Great for morning briefings, reminders, proactive check-ins.
+        from context_loader import load_context
+        from llm_client import complete
+        from notification import notify
+        from persona import set_context
+        from config import settings as _s
+
+        username   = job.get("user") or _s.user_name.lower()
+        persona_nm = job.get("persona") or _s.agent_name.lower()
+        channel    = job.get("channel") or None
+        set_context(username, persona_nm)
+
+        system_prompt = load_context(2)  # tier 2: identity + memory + user profile
+        try:
+            response_text, backend = await complete(
+                system_prompt=system_prompt,
+                messages=[{"role": "user", "content": payload}],
+            )
+            await notify(username, response_text, channel=channel)
+            logger.info("cron [brief] sent via %s: %s", backend, label)
+        except Exception as e:
+            logger.error("cron [brief] LLM error for %s: %s", label, e)
+
    else:
        logger.warning("cron: unknown type %r (job %s)", job_type, job.get("id"))
        return
--- a/cortex/llm_client.py
+++ b/cortex/llm_client.py
@@ -31,6 +31,10 @@ async def cleanup() -> None:
    _active_pgroups.clear()


+_BACKENDS = ("claude", "gemini", "local")
+_FALLBACK = {"claude": "gemini", "gemini": "claude", "local": "claude"}
+
+
 async def complete(
    system_prompt: str,
    messages: list[dict],
@@ -38,12 +42,12 @@ async def complete(
    max_tokens: int = 2048,
 ) -> tuple[str, str]:
    """Returns (response_text, actual_backend_used)."""
-    if model in ("claude", "gemini"):
+    if model in _BACKENDS:
        primary = model
    else:
        primary = settings.primary_backend

-    fallback = "gemini" if primary == "claude" else "claude"
+    fallback = _FALLBACK.get(primary, "claude")

    try:
        response = await _dispatch(primary, system_prompt, messages, model)
@@ -65,6 +69,8 @@ async def _dispatch(
 ) -> str:
    if backend == "gemini":
        return await _gemini(system_prompt, messages)
+    if backend == "local":
+        return await _local(system_prompt, messages)
    return await _claude(system_prompt, messages, model)


@@ -108,6 +114,54 @@ async def _claude(system_prompt: str, messages: list[dict], model: str | None) -
    return await _run(cmd, timeout=settings.timeout_claude, env=env)


+async def _local(system_prompt: str, messages: list[dict]) -> str:
+    """OpenAI-compatible backend — Open WebUI / Ollama.
+
+    Per-user config (home/{user}/local_llm.json) takes precedence over
+    the server-level .env defaults.
+    """
+    import httpx
+    from persona import _user
+    from user_settings import get_active_local_model
+
+    cfg = get_active_local_model(_user.get())
+    if not cfg:
+        raise RuntimeError("No local model configured — add one at /settings/local")
+
+    api_url = cfg["api_url"]
+    api_key = cfg["api_key"]
+    model   = cfg["model_name"]
+
+    if not api_url:
+        raise RuntimeError("local_api_url not configured — set LOCAL_API_URL in .env or add a host at /settings/local")
+    if not model:
+        raise RuntimeError("local_model not configured — add a model at /settings/local")
+
+    logger.info("local backend: %s @ %s", model, api_url)
+
+    msgs: list[dict] = []
+    if system_prompt:
+        msgs.append({"role": "system", "content": system_prompt})
+    msgs.extend(messages)
+
+    url = api_url.rstrip("/") + "/api/chat/completions"
+    headers: dict[str, str] = {}
+    if api_key:
+        headers["Authorization"] = f"Bearer {api_key}"
+
+    payload = {"model": model, "messages": msgs}
+
+    async with httpx.AsyncClient(timeout=settings.timeout_local) as client:
+        resp = await client.post(url, json=payload, headers=headers)
+        resp.raise_for_status()
+        data = resp.json()
+
+    text = data["choices"][0]["message"]["content"]
+    if not text or not text.strip():
+        raise RuntimeError("Local model returned an empty response")
+    return text.strip()
+
+
 async def _gemini(system_prompt: str, messages: list[dict]) -> str:
    # Gemini CLI spawns MCP child processes that keep stdout pipes open after responding.
    # start_new_session=True puts the whole tree in its own process group so
--- a/cortex/main.py
+++ b/cortex/main.py
@@ -9,7 +9,7 @@ logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(name)s: %(messag
 from config import settings
 from auth_middleware import SessionAuthMiddleware
 from routers import chat, google_chat, nextcloud_talk, files, distill, auth, orchestrator
-from routers import ui, onboarding, settings, help, auth_google
+from routers import ui, onboarding, settings, help, auth_google, local_llm


@asynccontextmanager
@@ -47,6 +47,7 @@ app.include_router(onboarding.router)

 # Account settings
 app.include_router(settings.router)
+app.include_router(local_llm.router)

 # Help page
 app.include_router(help.router)
--- a/cortex/memory_distiller.py
+++ b/cortex/memory_distiller.py
@@ -77,15 +77,22 @@ def distill_short(username: str | None = None, persona: str | None = None) -> di
 async def distill_mid(username: str | None = None, persona: str | None = None) -> dict:
    """
    Ask the LLM to summarize MEMORY_SHORT.md → MEMORY_MID.md.
+    Uses DISTILL_BACKEND_MID if set (e.g. "local"), otherwise primary_backend.
    """
    from llm_client import complete
+    from persona import set_context

-    inara_dir = _persona_path(username, persona)
+    u = username or settings.user_name.lower()
+    p = persona or settings.agent_name.lower()
+    set_context(u, p)
+
+    inara_dir = _persona_path(u, p)
    short_content = _read(inara_dir / "MEMORY_SHORT.md")

    if not short_content.strip() or "Not yet populated" in short_content:
        return {"error": "MEMORY_SHORT.md is empty — run distill/short first"}

+    backend_override = settings.distill_backend_mid or None
    budget_tokens = settings.memory_budget_mid
    system_prompt = (
        f"You are {settings.agent_name}'s memory distillation system. "
@@ -100,6 +107,7 @@ async def distill_mid(username: str | None = None, persona: str | None = None) -
    response_text, backend = await complete(
        system_prompt=system_prompt,
        messages=[{"role": "user", "content": short_content}],
+        model=backend_override,
    )

    now = datetime.now().strftime("%Y-%m-%d %H:%M")
@@ -112,6 +120,7 @@ async def distill_mid(username: str | None = None, persona: str | None = None) -
    logger.info("distill_mid: wrote %d chars via %s", len(header) + len(response_text), backend)

    return {
+        "username": u,
        "backend": backend,
        "chars_written": len(header) + len(response_text),
        "budget_tokens": budget_tokens,
@@ -121,16 +130,23 @@ async def distill_mid(username: str | None = None, persona: str | None = None) -
 async def distill_long(username: str | None = None, persona: str | None = None) -> dict:
    """
    Ask the LLM to integrate MEMORY_MID.md into MEMORY_LONG.md.
+    Uses DISTILL_BACKEND_LONG if set, otherwise primary_backend.
    """
    from llm_client import complete
+    from persona import set_context

-    inara_dir = _persona_path(username, persona)
+    u = username or settings.user_name.lower()
+    p = persona or settings.agent_name.lower()
+    set_context(u, p)
+
+    inara_dir = _persona_path(u, p)
    long_content = _read(inara_dir / "MEMORY_LONG.md")
    mid_content = _read(inara_dir / "MEMORY_MID.md")

    if not mid_content.strip() or "Not yet populated" in mid_content:
        return {"error": "MEMORY_MID.md is empty — run distill/mid first"}

+    backend_override = settings.distill_backend_long or None
    budget_tokens = settings.memory_budget_long
    system_prompt = (
        f"You are {settings.agent_name}'s long-term memory curator. "
@@ -149,6 +165,7 @@ async def distill_long(username: str | None = None, persona: str | None = None)
    response_text, backend = await complete(
        system_prompt=system_prompt,
        messages=[{"role": "user", "content": user_content}],
+        model=backend_override,
    )

    # Ensure the file has the right header if the LLM dropped it
@@ -165,6 +182,7 @@ async def distill_long(username: str | None = None, persona: str | None = None)
    logger.info("distill_long: wrote %d chars via %s", len(response_text), backend)

    return {
+        "username": u,
        "backend": backend,
        "chars_written": len(response_text),
        "budget_tokens": budget_tokens,
--- a/cortex/notification.py
+++ b/cortex/notification.py
@@ -0,0 +1,106 @@
+"""
+Outbound notification helpers — send messages to user channels proactively.
+
+Channel config lives in home/{user}/channels.json.
+Each channel that supports proactive notifications needs a notification_channel
+set to its key name (e.g. "nextcloud", "google_chat") in the user's channels.json:
+  {
+    "notification_channel": "nextcloud",
+    "nextcloud": {
+      "url": "https://cloud.example.com",
+      "bot_secret": "...",
+      "notification_room": "<room-token>",
+      ...
+    }
+  }
+
+If notification_channel is absent, defaults to "nextcloud" if configured.
+If notification_room (for NCT) is absent, notifications are silently skipped.
+"""
+import hashlib
+import hmac
+import json
+import logging
+import secrets
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+
+async def _send_nct_message(url: str, secret: str, room: str, message: str) -> None:
+    """Post a message to a Nextcloud Talk room as the bot."""
+    endpoint = f"{url}/ocs/v2.php/apps/spreed/api/v1/bot/{room}/message"
+    random_str = secrets.token_hex(32)
+    sig = hmac.new(
+        secret.encode(),
+        (random_str + message).encode("utf-8"),
+        hashlib.sha256,
+    ).hexdigest()
+    body = json.dumps({"message": message}, ensure_ascii=False).encode("utf-8")
+
+    try:
+        async with httpx.AsyncClient() as client:
+            resp = await client.post(
+                endpoint,
+                content=body,
+                headers={
+                    "Content-Type": "application/json",
+                    "OCS-APIRequest": "true",
+                    "X-Nextcloud-Talk-Bot-Random": random_str,
+                    "X-Nextcloud-Talk-Bot-Signature": sig,
+                },
+                timeout=15,
+            )
+        if resp.status_code not in (200, 201):
+            logger.warning("notify NCT %s → HTTP %d: %s", room, resp.status_code, resp.text[:200])
+        else:
+            logger.info("notify NCT → %s (%d chars)", room, len(message))
+    except Exception as e:
+        logger.error("notify NCT error: %s", e)
+
+
+async def _notify_nct(nct: dict, message: str, username: str) -> None:
+    room   = nct.get("notification_room", "").strip()
+    url    = nct.get("url", "").rstrip("/")
+    secret = nct.get("bot_secret", "")
+    if not room:
+        logger.debug("notify: NCT notification_room not set for %s — skipping", username)
+        return
+    if not url or not secret:
+        logger.warning("notify: NCT config incomplete for %s (missing url or secret)", username)
+        return
+    await _send_nct_message(url, secret, room, message)
+
+
+async def notify(username: str, message: str, channel: str | None = None) -> None:
+    """Send a notification to the user's preferred outbound channel.
+
+    Channel resolution order:
+      1. `channel` parameter if provided
+      2. `notification_channel` key in channels.json
+      3. "nextcloud" if configured
+      4. Silent no-op
+
+    To configure: set `notification_channel` in home/{user}/channels.json.
+    For NCT: also set `notification_room` in the nextcloud section.
+    """
+    from auth_utils import get_user_channels
+    channels = get_user_channels(username)
+
+    target = channel or channels.get("notification_channel", "").strip()
+    if not target:
+        # Auto-detect: use nextcloud if configured
+        if "nextcloud" in channels:
+            target = "nextcloud"
+        else:
+            return
+
+    if target == "nextcloud":
+        nct = channels.get("nextcloud")
+        if not nct:
+            logger.debug("notify: nextcloud not configured for %s", username)
+            return
+        await _notify_nct(nct, message, username)
+    else:
+        logger.debug("notify: channel %r not yet supported for outbound (user %s)", target, username)
--- a/cortex/requirements.txt
+++ b/cortex/requirements.txt
@@ -16,5 +16,8 @@ bcrypt>=4.0.0
 PyJWT>=2.8.0
 python-multipart>=0.0.9   # required by FastAPI for Form() data

+# Async HTTP client — used for local OpenAI-compatible backend (Open WebUI / Ollama)
+httpx>=0.27.0
+
 # anthropic SDK not needed — using claude CLI subprocess for auth
 # anthropic>=0.40.0
--- a/cortex/routers/auth.py
+++ b/cortex/routers/auth.py
@@ -13,6 +13,7 @@ import logging
 from datetime import datetime, timezone
 from pathlib import Path
 from fastapi import APIRouter
+from config import settings

 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/auth")
@@ -71,9 +72,27 @@ def _gemini_status() -> dict:
        return {"ok": False, "error": str(e), "warning": True, "authenticated": False}


+async def _local_status() -> dict:
+    if not settings.local_api_url:
+        return {"configured": False}
+    try:
+        import httpx
+        url = settings.local_api_url.rstrip("/") + "/api/models"
+        headers = {}
+        if settings.local_api_key:
+            headers["Authorization"] = f"Bearer {settings.local_api_key}"
+        async with httpx.AsyncClient(timeout=5) as client:
+            resp = await client.get(url, headers=headers)
+        reachable = resp.status_code < 400
+        return {"configured": True, "reachable": reachable, "model": settings.local_model}
+    except Exception as e:
+        return {"configured": True, "reachable": False, "error": str(e), "model": settings.local_model}
+
+
@router.get("/status")
 async def auth_status() -> dict:
    return {
        "claude": _claude_status(),
        "gemini": _gemini_status(),
+        "local": await _local_status(),
    }
--- a/cortex/routers/chat.py
+++ b/cortex/routers/chat.py
@@ -1,6 +1,7 @@
 import asyncio
 import json
-from fastapi import APIRouter, HTTPException, Query
+import jwt
+from fastapi import APIRouter, HTTPException, Query, Request
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
 from context_loader import load_context
@@ -9,6 +10,8 @@ from session_logger import log_turn
 from session_store import load as load_session, save as save_session, list_all, generate_session_id, delete as delete_session, rename as rename_session
 from config import settings
 from persona import set_context, validate as validate_persona
+from auth_utils import COOKIE_NAME, decode_token
+import user_settings
 import event_bus


@@ -29,7 +32,7 @@ class ChatRequest(BaseModel):


 class BackendRequest(BaseModel):
-    primary: str  # "claude" or "gemini"
+    primary: str  # "claude", "gemini", or "local"


 class NoteRequest(BaseModel):
@@ -130,19 +133,45 @@ async def chat(req: ChatRequest) -> StreamingResponse:
    )


+_BACKEND_CYCLE = ("claude", "gemini", "local")
+_BACKEND_FALLBACK = {"claude": "gemini", "gemini": "claude", "local": "claude"}
+
+
+def _local_model_info(request: Request) -> dict | None:
+    """Return active local model {label, model_name} for the session user, or None."""
+    try:
+        token    = request.cookies.get(COOKIE_NAME)
+        username = decode_token(token) if token else None
+        if not username:
+            return None
+        cfg = user_settings.get_active_local_model(username)
+        if cfg:
+            return {"label": cfg["label"], "model_name": cfg["model_name"]}
+    except (jwt.InvalidTokenError, Exception):
+        pass
+    return None
+
+
@router.get("/backend")
-async def get_backend() -> dict:
-    other = "gemini" if settings.primary_backend == "claude" else "claude"
-    return {"primary": settings.primary_backend, "fallback": other}
+async def get_backend(request: Request) -> dict:
+    p = settings.primary_backend
+    return {
+        "primary":      p,
+        "fallback":     _BACKEND_FALLBACK.get(p, "claude"),
+        "local_model":  _local_model_info(request),
+    }


@router.post("/backend")
-async def set_backend(req: BackendRequest) -> dict:
-    if req.primary not in ("claude", "gemini"):
-        raise HTTPException(status_code=400, detail="primary must be 'claude' or 'gemini'")
+async def set_backend(req: BackendRequest, request: Request) -> dict:
+    if req.primary not in _BACKEND_CYCLE:
+        raise HTTPException(status_code=400, detail="primary must be 'claude', 'gemini', or 'local'")
    settings.primary_backend = req.primary
-    other = "gemini" if req.primary == "claude" else "claude"
-    return {"primary": settings.primary_backend, "fallback": other}
+    return {
+        "primary":     req.primary,
+        "fallback":    _BACKEND_FALLBACK[req.primary],
+        "local_model": _local_model_info(request),
+    }


 def _set_ctx(user: str, persona: str) -> None:
--- a/cortex/routers/files.py
+++ b/cortex/routers/files.py
@@ -1,7 +1,8 @@
 """
-Read/write the Inara identity markdown files.
+Read/write Inara identity markdown files, and search past session logs.
 Only whitelisted filenames are accessible — no path traversal possible.
 """
+import re
 from fastapi import APIRouter, HTTPException, Query
 from pydantic import BaseModel
 from persona import persona_path, set_context, validate as validate_persona
@@ -47,10 +48,12 @@ async def list_files(
    files = []
    for name in sorted(ALLOWED):
        p = persona_dir / name
+        st = p.stat() if p.exists() else None
        files.append({
            "name": name,
            "exists": p.exists(),
-            "size": p.stat().st_size if p.exists() else 0,
+            "size": st.st_size if st else 0,
+            "modified": st.st_mtime if st else None,
        })
    return {"files": files}

@@ -83,3 +86,59 @@ async def save_file(
    p = _path(filename)
    p.write_text(req.content)
    return {"ok": True, "name": filename, "size": len(req.content)}
+
+
+# ── Session search ────────────────────────────────────────────────────────────
+
+_CONTEXT_CHARS = 120  # chars of context to include around each match
+
+
+@router.get("/sessions/search")
+async def search_sessions(
+    q: str = Query(..., min_length=2),
+    user: str = Query("scott"),
+    persona: str = Query("inara"),
+    limit: int = Query(20, ge=1, le=100),
+) -> dict:
+    """Full-text search across past session logs.
+
+    Returns up to `limit` matches, newest sessions first.
+    Each match includes a short excerpt (120 chars before/after) for context.
+    """
+    _resolve(user, persona)
+    sessions_dir = persona_path() / "sessions"
+    if not sessions_dir.exists():
+        return {"query": q, "matches": [], "total_files_searched": 0}
+
+    pattern = re.compile(re.escape(q), re.IGNORECASE)
+    session_files = sorted(sessions_dir.glob("*.md"), reverse=True)  # newest first
+
+    matches = []
+    for sf in session_files:
+        if len(matches) >= limit:
+            break
+        try:
+            text = sf.read_text()
+        except OSError:
+            continue
+        for m in pattern.finditer(text):
+            if len(matches) >= limit:
+                break
+            start = max(0, m.start() - _CONTEXT_CHARS)
+            end   = min(len(text), m.end() + _CONTEXT_CHARS)
+            excerpt = text[start:end].strip()
+            # Prefix with ellipsis if we truncated the left side
+            if start > 0:
+                excerpt = "…" + excerpt
+            if end < len(text):
+                excerpt = excerpt + "…"
+            matches.append({
+                "date":    sf.stem,          # YYYY-MM-DD
+                "excerpt": excerpt,
+            })
+
+    return {
+        "query":               q,
+        "matches":             matches,
+        "total_files_searched": len(session_files),
+    }
--- a/cortex/routers/local_llm.py
+++ b/cortex/routers/local_llm.py
@@ -0,0 +1,242 @@
+"""
+Local LLM settings — per-user host and model configuration.
+
+Routes:
+  GET  /settings/local                      → settings page
+  POST /settings/local/host                 → save/create host
+  POST /settings/local/models/add           → add model entry
+  POST /settings/local/models/{id}/activate → set active model
+  POST /settings/local/models/{id}/remove   → remove model entry
+  GET  /api/local-llm/fetch-models          → proxy to host /api/models (JSON)
+"""
+import logging
+from pathlib import Path
+
+import httpx
+import jwt
+from fastapi import APIRouter, Form, Request
+from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse
+
+from auth_utils import COOKIE_NAME, decode_token
+from config import settings as app_settings
+import user_settings as us
+
+logger = logging.getLogger(__name__)
+router = APIRouter()
+
+_STATIC = Path(__file__).parent.parent / "static"
+
+
+# ── Auth helper ───────────────────────────────────────────────────────────────
+
+def _get_user(request: Request) -> str | None:
+    token = request.cookies.get(COOKIE_NAME)
+    if not token:
+        return None
+    try:
+        return decode_token(token)
+    except jwt.InvalidTokenError:
+        return None
+
+
+# ── Page renderer ─────────────────────────────────────────────────────────────
+
+def _render(username: str, success: str = "", error: str = "") -> str:
+    cfg     = us.get_config(username)
+    hosts   = cfg["hosts"]
+    models  = cfg["models"]
+    active  = cfg.get("active_model_id")
+
+    # Build a host lookup for model rows
+    host_by_id = {h["id"]: h for h in hosts}
+
+    # ── Host section ──────────────────────────────────────────────────────────
+    if hosts:
+        h = hosts[0]   # one host for now
+        host_id_val  = h["id"]
+        host_label   = h.get("label", "")
+        host_url     = h.get("api_url", "")
+        host_key_hint = f"…{h['api_key'][-4:]}" if h.get("api_key") else "not set"
+    else:
+        host_id_val  = ""
+        host_label   = ""
+        host_url     = app_settings.local_api_url
+        host_key_hint = f"server default (…{app_settings.local_api_key[-4:]})" \
+                        if app_settings.local_api_key else "not set"
+
+    # ── Model rows ────────────────────────────────────────────────────────────
+    model_rows = ""
+    for m in models:
+        is_active = m["id"] == active
+        host      = host_by_id.get(m["host_id"], {})
+        host_name = host.get("label") or host.get("api_url") or "unknown host"
+        badge     = '<span class="active-badge">active</span>' if is_active else ""
+        activate_btn = (
+            '<span class="active-label">✓ Active</span>'
+            if is_active else
+            f'''<form method="POST" action="/settings/local/models/{m["id"]}/activate" style="display:inline">
+                  <button type="submit" class="row-btn">Set active</button>
+                </form>'''
+        )
+        model_rows += f'''
+        <div class="model-row{"  model-active" if is_active else ""}">
+          <div class="model-info">
+            <span class="model-label">{m.get("label") or m["model_name"]}</span>{badge}
+            <span class="model-name">{m["model_name"]}</span>
+            <span class="model-host">{host_name}</span>
+          </div>
+          <div class="model-actions">
+            {activate_btn}
+            <form method="POST" action="/settings/local/models/{m["id"]}/remove" style="display:inline"
+                  onsubmit="return confirm('Remove {m.get('label') or m['model_name']}?')">
+              <button type="submit" class="row-btn danger">Remove</button>
+            </form>
+          </div>
+        </div>'''
+
+    if not model_rows:
+        model_rows = '<p class="empty-note">No models added yet. Use "Add Model" below.</p>'
+
+    # ── Host select for Add Model ─────────────────────────────────────────────
+    host_options = "".join(
+        f'<option value="{h["id"]}">{h.get("label") or h["api_url"]}</option>'
+        for h in hosts
+    )
+    add_section_hidden = "" if hosts else ' style="display:none"'
+
+    html = (_STATIC / "local_llm.html").read_text()
+    first_host_id = hosts[0]["id"] if hosts else ""
+
+    html = html.replace("{{ username }}",         username)
+    html = html.replace("{{ host_id }}",          host_id_val)
+    html = html.replace("{{ host_label }}",       host_label)
+    html = html.replace("{{ host_url }}",         host_url)
+    html = html.replace("{{ host_key_hint }}",    host_key_hint)
+    html = html.replace("{{ model_rows }}",       model_rows)
+    html = html.replace("{{ host_options }}",     host_options)
+    html = html.replace("{{ first_host_id }}",    first_host_id)
+    html = html.replace("{{ add_section_hidden }}", add_section_hidden)
+    html = html.replace("{{ has_host }}",         "true" if hosts else "false")
+    if success:
+        html = html.replace("<!-- SUCCESS -->", f'<p class="msg success">{success}</p>')
+    if error:
+        html = html.replace("<!-- ERROR -->",   f'<p class="msg error">{error}</p>')
+    return html
+
+
+# ── Routes ────────────────────────────────────────────────────────────────────
+
+@router.get("/settings/local", include_in_schema=False)
+async def local_llm_page(request: Request):
+    username = _get_user(request)
+    if not username:
+        return RedirectResponse("/login", status_code=302)
+    return HTMLResponse(_render(username))
+
+
+@router.post("/settings/local/host", include_in_schema=False)
+async def save_host(
+    request: Request,
+    host_id:  str = Form(""),
+    label:    str = Form(""),
+    api_url:  str = Form(""),
+    api_key:  str = Form(""),
+):
+    username = _get_user(request)
+    if not username:
+        return RedirectResponse("/login", status_code=302)
+
+    if not api_url.strip():
+        return HTMLResponse(_render(username, error="API URL is required."))
+
+    us.save_host(username, host_id or None, label, api_url, api_key)
+    logger.info("local LLM host saved: %s", username)
+    return HTMLResponse(_render(username, success="Host saved."))
+
+
+@router.post("/settings/local/models/add", include_in_schema=False)
+async def add_model(
+    request:    Request,
+    host_id:    str = Form(...),
+    label:      str = Form(""),
+    model_name: str = Form(...),
+):
+    username = _get_user(request)
+    if not username:
+        return RedirectResponse("/login", status_code=302)
+
+    if not model_name.strip():
+        return HTMLResponse(_render(username, error="Model name is required."))
+
+    us.add_model(username, host_id, label, model_name)
+    logger.info("local model added: %s / %s", username, model_name)
+    return HTMLResponse(_render(username, success=f"Model \"{label or model_name}\" added."))
+
+
+@router.post("/settings/local/models/{model_id}/activate", include_in_schema=False)
+async def activate_model(request: Request, model_id: str):
+    username = _get_user(request)
+    if not username:
+        return RedirectResponse("/login", status_code=302)
+
+    if not us.set_active_model(username, model_id):
+        return HTMLResponse(_render(username, error="Model not found."))
+
+    logger.info("active local model set: %s / %s", username, model_id)
+    return HTMLResponse(_render(username, success="Active model updated."))
+
+
+@router.post("/settings/local/models/{model_id}/remove", include_in_schema=False)
+async def remove_model(request: Request, model_id: str):
+    username = _get_user(request)
+    if not username:
+        return RedirectResponse("/login", status_code=302)
+
+    us.remove_model(username, model_id)
+    logger.info("local model removed: %s / %s", username, model_id)
+    return HTMLResponse(_render(username, success="Model removed."))
+
+
+@router.get("/api/local-llm/fetch-models")
+async def fetch_models(request: Request) -> JSONResponse:
+    """Proxy to the configured host's /api/models endpoint.
+
+    Returns [{id, name}] sorted by name, or an error dict.
+    """
+    username = _get_user(request)
+    if not username:
+        return JSONResponse({"error": "Not authenticated"}, status_code=401)
+
+    cfg   = us.get_config(username)
+    hosts = cfg.get("hosts", [])
+
+    # Fall back to .env if no host configured yet
+    if hosts:
+        h       = hosts[0]
+        api_url = h.get("api_url", "")
+        api_key = h.get("api_key", "")
+    else:
+        api_url = app_settings.local_api_url
+        api_key = app_settings.local_api_key
+
+    if not api_url:
+        return JSONResponse({"error": "No host configured."}, status_code=400)
+
+    url     = api_url.rstrip("/") + "/api/models"
+    headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
+
+    try:
+        async with httpx.AsyncClient(timeout=8) as client:
+            resp = await client.get(url, headers=headers)
+        resp.raise_for_status()
+        data   = resp.json()
+        models = [
+            {"id": m["id"], "name": m.get("name") or m["id"]}
+            for m in data.get("data", [])
+        ]
+        models.sort(key=lambda m: m["name"].lower())
+        return JSONResponse({"models": models})
+    except httpx.HTTPStatusError as e:
+        return JSONResponse({"error": f"Host returned {e.response.status_code}"}, status_code=502)
+    except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=502)
--- a/cortex/routers/nextcloud_talk.py
+++ b/cortex/routers/nextcloud_talk.py
@@ -1,16 +1,13 @@
 import asyncio
-import hashlib
-import hmac
 import json
 import logging
-import secrets

-import httpx
 from fastapi import APIRouter, BackgroundTasks, HTTPException, Request, Response

 from auth_utils import get_user_channels
 from context_loader import load_context
 from llm_client import complete
+from notification import _send_nct_message
 from persona import set_context
 from session_logger import log_turn
 from session_store import load as load_session, save as save_session
@@ -40,38 +37,8 @@ def _verify_signature(body: bytes, random_header: str, sig_header: str, secret:

 async def _send_reply(conversation_token: str, message: str, nextcloud_url: str, secret: str) -> None:
    """Post a message to Nextcloud Talk as the bot."""
-    url = (
-        f"{nextcloud_url}/ocs/v2.php/apps/spreed/api/v1"
-        f"/bot/{conversation_token}/message"
-    )
-    # NC Talk verifies HMAC over (random + message_text), NOT the raw body.
-    # See BotController::getBotFromHeaders → checksumVerificationService::validateRequest($random, $sig, $secret, $message)
-    body_dict  = {"message": message}
-    body_bytes = json.dumps(body_dict, ensure_ascii=False).encode("utf-8")
-    random_str = secrets.token_hex(32)
-    sig = hmac.new(
-        secret.encode(),
-        (random_str + message).encode("utf-8"),
-        hashlib.sha256,
-    ).hexdigest()
-
-    logger.info("NCT _send_reply → %s (body: %s)", url, body_bytes.decode())
-    try:
-        async with httpx.AsyncClient() as client:
-            resp = await client.post(
-                url,
-                content=body_bytes,
-                headers={
-                    "Content-Type": "application/json",
-                    "OCS-APIRequest": "true",
-                    "X-Nextcloud-Talk-Bot-Random": random_str,
-                    "X-Nextcloud-Talk-Bot-Signature": sig,
-                },
-                timeout=15,
-            )
-        logger.info("NCT reply: %s — %s", resp.status_code, resp.text[:400])
-    except Exception as e:
-        logger.error("NCT reply error: %s", e)
+    logger.info("NCT _send_reply → room %s (%d chars)", conversation_token, len(message))
+    await _send_nct_message(nextcloud_url, secret, conversation_token, message)


 async def _process_message(
--- a/cortex/routers/settings.py
+++ b/cortex/routers/settings.py
@@ -55,6 +55,7 @@ def _settings_page(username: str, personas: list[str], success: str = "", error:
        hint = "Using server key"
    html = html.replace("{{ gemini_key_hint }}", hint)
    html = html.replace("{{ gemini_key_set }}", "true" if gemini_key else "false")
+
    persona_items = "\n".join(
        f'''<li>
          <a href="/{username}/{p}" class="persona-link">{p}</a>
--- a/cortex/scheduler.py
+++ b/cortex/scheduler.py
@@ -30,24 +30,28 @@ async def _run_short() -> None:

 async def _run_mid() -> None:
    from memory_distiller import distill_mid
+    from notification import notify
    try:
        result = await distill_mid()
        if "error" in result:
            logger.warning("auto distill mid skipped: %s", result["error"])
        else:
            logger.info("auto distill mid: %d chars via %s", result["chars_written"], result["backend"])
+            await notify(result["username"], f"📝 Weekly memory digest complete ({result['chars_written']} chars via {result['backend']}).")
    except Exception as e:
        logger.error("auto distill mid failed: %s", e)


 async def _run_long() -> None:
    from memory_distiller import distill_long
+    from notification import notify
    try:
        result = await distill_long()
        if "error" in result:
            logger.warning("auto distill long skipped: %s", result["error"])
        else:
            logger.info("auto distill long: %d chars via %s", result["chars_written"], result["backend"])
+            await notify(result["username"], f"🧠 Monthly long-term memory integration complete ({result['chars_written']} chars via {result['backend']}). Worth a quick review.")
    except Exception as e:
        logger.error("auto distill long failed: %s", e)

--- a/cortex/static/app.js
+++ b/cortex/static/app.js
@@ -16,6 +16,44 @@
        const note_vis_btn_el    = document.getElementById('note-vis-btn');
        const settings_btn_el    = document.getElementById('settings-btn');
        const settings_dd_el     = document.getElementById('settings-dropdown');
+        const sessionsBackdrop   = document.getElementById('sessions-backdrop');
+
+        // ── Close all panels/dropdowns (mutual exclusion) ─────────────
+        function closeAllPanels() {
+            if (mode_dropdown_el)  mode_dropdown_el.classList.remove('open');
+            if (settings_dd_el)    settings_dd_el.classList.remove('open');
+            if (sessionsPanel)     { sessionsPanel.classList.remove('open'); sessionsBackdrop.classList.remove('open'); }
+            const pd = document.getElementById('persona-dropdown');
+            if (pd) pd.classList.remove('open');
+        }
+
+        // ── Toasts ────────────────────────────────────────────────────
+        const toastContainer = document.getElementById('toast-container');
+
+        function showToast(message, type = 'info', duration = 2500) {
+            const el = document.createElement('div');
+            el.className = 'toast' + (type !== 'info' ? ' ' + type : '');
+            el.textContent = message;
+            toastContainer.appendChild(el);
+            requestAnimationFrame(() => {
+                requestAnimationFrame(() => el.classList.add('show'));
+            });
+            setTimeout(() => {
+                el.classList.remove('show');
+                el.addEventListener('transitionend', () => el.remove(), { once: true });
+            }, duration);
+        }
+
+        // ── Syntax highlighting ───────────────────────────────────────
+        function highlight_code(container) {
+            if (typeof hljs === 'undefined') return;
+            container.querySelectorAll('pre code').forEach(el => hljs.highlightElement(el));
+        }
+
+        // ── Utility helpers ───────────────────────────────────────────
+        function _esc(s) {
+            return String(s).replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/"/g,'&quot;');
+        }

        // ── Lucide icon helpers ───────────────────────────────────────
        function icon_html(name, size = 16) {
@@ -145,6 +183,7 @@
        }

        function open_mode_dropdown() {
+            closeAllPanels();
            // Build options in MRU order (least recent at top, most recent at bottom)
            // — bottom is visually closest to the button since dropdown opens upward
            const ordered = [...mode_mru].reverse();
@@ -236,7 +275,9 @@
        // ── Settings dropdown ─────────────────────────────────────────
        settings_btn_el.addEventListener('click', (e) => {
            e.stopPropagation();
-            settings_dd_el.classList.toggle('open');
+            const isOpen = settings_dd_el.classList.contains('open');
+            closeAllPanels();
+            if (!isOpen) settings_dd_el.classList.add('open');
        });
        document.addEventListener('click', (e) => {
            if (!settings_dd_el.contains(e.target) && e.target !== settings_btn_el) {
@@ -290,7 +331,9 @@
        if (personaSwitcher) {
            personaSwitcher.addEventListener('click', (e) => {
                if (personaDropEl.children.length === 0) return;
-                personaDropEl.classList.toggle('open');
+                const isOpen = personaDropEl.classList.contains('open');
+                closeAllPanels();
+                if (!isOpen) personaDropEl.classList.add('open');
                e.stopPropagation();
            });
            document.addEventListener('click', () => personaDropEl.classList.remove('open'));
@@ -298,23 +341,40 @@

        // ── Backend toggle ───────────────────────────────────────────

-        fetch('/backend').then(r => r.json()).then(d => setBackendUI(d.primary));
+        fetch('/backend').then(r => r.json()).then(d => setBackendUI(d));

-        function setBackendUI(backend) {
+        const BACKEND_CYCLE = ['claude', 'gemini', 'local'];
+        const BACKEND_CLASS = { claude: '', gemini: 'mem-on', local: 'local-on' };
+        const backendModelHint = document.getElementById('backend-model-hint');
+
+        function setBackendUI(d) {
+            const backend = d.primary || d;  // accept full response obj or bare string
            primaryBackend = backend;
            backendToggle.textContent = backend;
-            backendToggle.className = 'ctx-btn' + (backend === 'gemini' ? ' mem-on' : '');
+            const extra = BACKEND_CLASS[backend] || '';
+            backendToggle.className = 'ctx-btn' + (extra ? ' ' + extra : '');
+
+            if (backendModelHint) {
+                if (backend === 'local' && d.local_model) {
+                    backendModelHint.textContent = d.local_model.label || d.local_model.model_name;
+                    backendModelHint.style.display = '';
+                } else {
+                    backendModelHint.textContent = '';
+                    backendModelHint.style.display = 'none';
+                }
+            }
        }

        backendToggle.addEventListener('click', async () => {
-            const next = primaryBackend === 'claude' ? 'gemini' : 'claude';
+            const idx = BACKEND_CYCLE.indexOf(primaryBackend);
+            const next = BACKEND_CYCLE[(idx + 1) % BACKEND_CYCLE.length];
            const res = await fetch('/backend', {
                method: 'POST',
                headers: { 'Content-Type': 'application/json' },
                body: JSON.stringify({ primary: next }),
            });
            const d = await res.json();
-            setBackendUI(d.primary);
+            setBackendUI(d);
            addMessage('system', `Backend: ${d.primary} (fallback: ${d.fallback})`);
        });

@@ -324,17 +384,26 @@
            e.stopPropagation();
            if (sessionsPanel.classList.contains('open')) {
                sessionsPanel.classList.remove('open');
+                sessionsBackdrop.classList.remove('open');
                return;
            }
+            closeAllPanels();
            const res = await fetch(`/sessions?${_fileParams}`);
            const data = await res.json();
            renderPanel(data.sessions);
            sessionsPanel.classList.add('open');
+            sessionsBackdrop.classList.add('open');
+        });
+
+        sessionsBackdrop.addEventListener('click', () => {
+            sessionsPanel.classList.remove('open');
+            sessionsBackdrop.classList.remove('open');
        });

        document.addEventListener('click', (e) => {
            if (!sessionsPanel.contains(e.target) && e.target !== sessionsBtn) {
                sessionsPanel.classList.remove('open');
+                sessionsBackdrop.classList.remove('open');
            }
        });

@@ -354,6 +423,7 @@
                sessionEl.textContent = '';
                addMessage('system', 'New session');
                sessionsPanel.classList.remove('open');
+                sessionsBackdrop.classList.remove('open');
                inputEl.focus();
            });
            sessionsPanel.appendChild(newItem);
@@ -408,6 +478,7 @@
                        if (sessionId === s.session_id) {
                            sessionEl.textContent = `session: ${newName || s.session_id}`;
                        }
+                        if (newName) showToast('Session renamed', 'success');
                    }

                    input.addEventListener('keydown', (e) => {
@@ -431,7 +502,7 @@
                        currentHistory = [];
                        messagesEl.innerHTML = '';
                        sessionEl.textContent = '';
-                        addMessage('system', 'Session deleted');
+                        showToast('Session deleted');
                    }
                    const res = await fetch(`/sessions?${_fileParams}`);
                    const data = await res.json();
@@ -484,6 +555,7 @@
            if (!silent) addMessage('system', `Resumed session ${id}`);
            scrollToBottom();
            sessionsPanel.classList.remove('open');
+            sessionsBackdrop.classList.remove('open');
            inputEl.focus();
            persist_session();
        }
@@ -529,6 +601,7 @@
            if (role === 'assistant' && typeof marked !== 'undefined') {
                div.dataset.raw = text;
                div.innerHTML = marked.parse(text);
+                highlight_code(div);
                div.querySelectorAll('a').forEach(a => {
                    a.target = '_blank';
                    a.rel = 'noopener noreferrer';
@@ -544,7 +617,9 @@
                div.appendChild(label);
                div.appendChild(content);
            } else {
+                div.dataset.raw = text;
                div.textContent = text;
+                div.appendChild(makeCopyBtn(div));
            }

            // Wrap user/assistant messages so action buttons can be attached
@@ -699,6 +774,7 @@
            if (role === 'assistant' && typeof marked !== 'undefined') {
                div.dataset.raw = text;
                div.innerHTML = marked.parse(text);
+                highlight_code(div);
                div.querySelectorAll('a').forEach(a => {
                    a.target = '_blank';
                    a.rel = 'noopener noreferrer';
@@ -709,6 +785,76 @@
            }
        }

+        // ── Agent tool-call step cards ────────────────────────────────
+        function renderToolCalls(toolCalls, beforeEl) {
+            if (!toolCalls || toolCalls.length === 0) return;
+
+            const container = document.createElement('div');
+            container.className = 'tool-calls-container';
+
+            for (const tc of toolCalls) {
+                const details = document.createElement('details');
+                details.className = 'tool-call';
+
+                // Summary: name + first arg value snippet
+                const args    = tc.args || {};
+                const argKeys = Object.keys(args);
+                let argSnippet = '';
+                if (argKeys.length > 0) {
+                    const firstVal = String(args[argKeys[0]]);
+                    argSnippet = firstVal.length > 60 ? firstVal.slice(0, 60) + '…' : firstVal;
+                }
+
+                const summary = document.createElement('summary');
+                const nameSpan = document.createElement('span');
+                nameSpan.className = 'tc-name';
+                nameSpan.textContent = tc.tool;
+                summary.appendChild(nameSpan);
+                if (argSnippet) {
+                    const snippetSpan = document.createElement('span');
+                    snippetSpan.className = 'tc-snippet';
+                    snippetSpan.textContent = argSnippet;
+                    summary.appendChild(snippetSpan);
+                }
+                details.appendChild(summary);
+
+                // Expanded body
+                const body = document.createElement('div');
+                body.className = 'tc-body';
+
+                if (argKeys.length > 0) {
+                    const sec = document.createElement('div');
+                    sec.className = 'tc-section';
+                    const lbl = document.createElement('span');
+                    lbl.className = 'tc-label';
+                    lbl.textContent = 'args';
+                    const pre = document.createElement('pre');
+                    pre.textContent = JSON.stringify(args, null, 2);
+                    sec.appendChild(lbl);
+                    sec.appendChild(pre);
+                    body.appendChild(sec);
+                }
+
+                const resultStr  = tc.result || '';
+                const truncated  = resultStr.length > 400;
+                const sec2 = document.createElement('div');
+                sec2.className = 'tc-section';
+                const lbl2 = document.createElement('span');
+                lbl2.className = 'tc-label';
+                lbl2.textContent = 'result';
+                const pre2 = document.createElement('pre');
+                pre2.textContent = truncated ? resultStr.slice(0, 400) + '\n…[truncated]' : resultStr;
+                sec2.appendChild(lbl2);
+                sec2.appendChild(pre2);
+                body.appendChild(sec2);
+
+                details.appendChild(body);
+                container.appendChild(details);
+            }
+
+            beforeEl.parentElement.insertBefore(container, beforeEl);
+        }
+
        function makeCopyBtn(div) {
            const btn = document.createElement('button');
            btn.className = 'copy-btn';
@@ -722,6 +868,7 @@
                } else {
                    fallbackCopy(text);
                }
+                showToast('Copied to clipboard', 'success', 1800);
                btn.innerHTML = icon_html('check', 12) + ' copied';
                render_icons();
                btn.classList.add('copied');
@@ -762,7 +909,7 @@
                });
                if (!res.ok) throw new Error(`HTTP ${res.status}`);
            } catch (err) {
-                addMessage('system', `Note save failed: ${err.message}`);
+                showToast(`Note save failed: ${err.message}`, 'error');
            }
        }

@@ -944,11 +1091,7 @@
                currentHistory.push({ role: 'assistant', content: job.response || '' });
                attachHistoryControls(thinkingDiv, assistHistIdx);

-                const n = job.tool_calls?.length || 0;
-                if (n) {
-                    const names = job.tool_calls.map(t => t.name).join(', ');
-                    addMessage('system', `⚡ ${n} tool call${n !== 1 ? 's' : ''}: ${names}`);
-                }
+                renderToolCalls(job.tool_calls, thinkingDiv.parentElement);

            } catch (err) {
                if (err.name === 'AbortError') {
@@ -989,17 +1132,94 @@

        // ── File editor ──────────────────────────────────────────────
        const fileModal      = document.getElementById('file-modal');
-        const fileSelect     = document.getElementById('file-select');
+        const fileSidebar    = document.getElementById('file-sidebar');
        const fileEditor     = document.getElementById('file-editor');
        const filePreview    = document.getElementById('file-preview');
        const fileRawBtn     = document.getElementById('file-raw-btn');
        const filePreviewBtn = document.getElementById('file-preview-btn');
        const fileSaveBtn    = document.getElementById('file-save-btn');
-        const fileSavedMsg   = document.getElementById('file-saved-msg');
        const fileCloseBtn   = document.getElementById('file-close-btn');
        const filesBtn       = document.getElementById('files-btn');

-        let fileMode = 'preview'; // 'edit' or 'preview'
+        let fileMode        = 'preview'; // 'edit' or 'preview'
+        let activeFileName  = null;
+
+        // File groups — controls sidebar order and section labels
+        const FILE_GROUPS = [
+            { label: 'Identity', files: ['IDENTITY.md', 'SOUL.md', 'PROTOCOLS.md', 'CONTEXT_TIERS.md'] },
+            { label: 'Memory',   files: ['MEMORY_LONG.md', 'MEMORY_MID.md', 'MEMORY_SHORT.md'] },
+            { label: 'Profile',  files: ['USER.md', 'HELP.md'] },
+        ];
+
+        function fmtSize(bytes) {
+            if (!bytes) return 'empty';
+            if (bytes < 1024) return bytes + ' B';
+            return (bytes / 1024).toFixed(1) + ' KB';
+        }
+
+        function fmtModified(ts) {
+            if (!ts) return '';
+            const d   = new Date(ts * 1000);
+            const now = new Date();
+            if (d.toDateString() === now.toDateString()) return 'today';
+            const diff = (now - d) / 86400000;
+            if (diff < 2) return 'yesterday';
+            return d.toLocaleDateString(undefined, { month: 'short', day: 'numeric' });
+        }
+
+        function renderFileSidebar(files) {
+            const byName = Object.fromEntries(files.map(f => [f.name, f]));
+            fileSidebar.innerHTML = '';
+
+            for (const group of FILE_GROUPS) {
+                const groupEl = document.createElement('div');
+                groupEl.className = 'file-group';
+
+                const header = document.createElement('div');
+                header.className = 'fg-header';
+                header.textContent = group.label;
+                header.addEventListener('click', () => header.classList.toggle('collapsed'));
+                groupEl.appendChild(header);
+
+                const items = document.createElement('div');
+                items.className = 'fg-items';
+
+                for (const fname of group.files) {
+                    const f = byName[fname];
+                    if (!f) continue;
+
+                    const item = document.createElement('div');
+                    item.className = 'file-item' + (f.exists ? '' : ' missing');
+                    item.dataset.name = fname;
+                    if (fname === activeFileName) item.classList.add('active');
+
+                    const nameEl = document.createElement('div');
+                    nameEl.className = 'fi-name';
+                    nameEl.textContent = fname;
+                    item.appendChild(nameEl);
+
+                    const metaEl = document.createElement('div');
+                    metaEl.className = 'fi-meta';
+                    metaEl.innerHTML = `<span>${fmtSize(f.size)}</span>`
+                        + (f.modified ? `<span>${fmtModified(f.modified)}</span>` : '');
+                    item.appendChild(metaEl);
+
+                    item.addEventListener('click', () => loadFile(fname));
+                    items.appendChild(item);
+                }
+
+                groupEl.appendChild(items);
+                fileSidebar.appendChild(groupEl);
+            }
+        }
+
+        function setActiveFile(name) {
+            activeFileName = name;
+            fileSidebar.querySelectorAll('.file-item').forEach(el => {
+                el.classList.toggle('active', el.dataset.name === name);
+            });
+            document.getElementById('file-modal-title').textContent = name;
+        }

        function setFileMode(mode) {
            fileMode = mode;
@@ -1023,27 +1243,22 @@
        }

        async function loadFile(name) {
+            setActiveFile(name);
            const res = await fetch(`/files/${encodeURIComponent(name)}?${_fileParams}`);
            if (!res.ok) { fileEditor.value = `Error loading ${name}`; return; }
            const data = await res.json();
            fileEditor.value = data.content;
-            document.getElementById('file-modal-title').textContent = name;
            setFileMode(fileMode);
        }

        async function openFileModal() {
-            // Populate the file list
-            const res = await fetch(`/files?${_fileParams}`);
+            const res  = await fetch(`/files?${_fileParams}`);
            const data = await res.json();
-            fileSelect.innerHTML = '';
-            for (const f of data.files) {
-                const opt = document.createElement('option');
-                opt.value = f.name;
-                opt.textContent = f.name + (f.exists ? '' : ' (missing)');
-                fileSelect.appendChild(opt);
-            }
+            renderFileSidebar(data.files);
            fileModal.classList.add('open');
-            await loadFile(fileSelect.value);
+            // Load first existing file
+            const first = data.files.find(f => f.exists) || data.files[0];
+            if (first) await loadFile(first.name);
        }

        filesBtn.addEventListener('click', () => {
@@ -1051,21 +1266,24 @@
            openFileModal();
        });

-        fileSelect.addEventListener('change', () => loadFile(fileSelect.value));
-
        fileRawBtn.addEventListener('click', () => setFileMode('edit'));
        filePreviewBtn.addEventListener('click', () => setFileMode('preview'));

        fileSaveBtn.addEventListener('click', async () => {
-            const name = fileSelect.value;
-            const res = await fetch(`/files/${encodeURIComponent(name)}?${_fileParams}`, {
+            if (!activeFileName) return;
+            const res = await fetch(`/files/${encodeURIComponent(activeFileName)}?${_fileParams}`, {
                method: 'PUT',
                headers: { 'Content-Type': 'application/json' },
                body: JSON.stringify({ content: fileEditor.value }),
            });
            if (res.ok) {
-                fileSavedMsg.classList.add('show');
-                setTimeout(() => fileSavedMsg.classList.remove('show'), 2000);
+                showToast('File saved', 'success');
+                // Refresh sidebar to update size/modified
+                const listRes = await fetch(`/files?${_fileParams}`);
+                const listData = await listRes.json();
+                renderFileSidebar(listData.files);
+            } else {
+                showToast('Save failed', 'error');
            }
        });

@@ -1075,6 +1293,66 @@
            if (e.target === fileModal) fileModal.classList.remove('open');
        });

+        // ── Session search ────────────────────────────────────────────
+        const sessionSearchInput   = document.getElementById('session-search-input');
+        const sessionSearchBtn     = document.getElementById('session-search-btn');
+        const sessionSearchResults = document.getElementById('session-search-results');
+
+        function _showFileView() {
+            fileEditor.style.display = '';
+            filePreview.style.display = '';
+            sessionSearchResults.style.display = 'none';
+        }
+
+        function _showSearchResults(html) {
+            fileEditor.style.display = 'none';
+            filePreview.style.display = 'none';
+            sessionSearchResults.style.display = '';
+            sessionSearchResults.innerHTML = html;
+        }
+
+        async function runSessionSearch() {
+            const q = sessionSearchInput.value.trim();
+            if (q.length < 2) return;
+            sessionSearchBtn.disabled = true;
+            sessionSearchBtn.textContent = '…';
+            try {
+                const res  = await fetch(`/sessions/search?q=${encodeURIComponent(q)}&${_fileParams}&limit=30`);
+                const data = await res.json();
+                if (!res.ok) { _showSearchResults(`<p class="sr-error">Error: ${data.detail || res.status}</p>`); return; }
+                if (!data.matches.length) {
+                    _showSearchResults(`<p class="sr-empty">No results for "<strong>${_esc(q)}</strong>" in ${data.total_files_searched} session file(s).</p>`);
+                    return;
+                }
+                let html = `<div class="sr-header">${data.matches.length} result(s) for "<strong>${_esc(q)}</strong>" across ${data.total_files_searched} session(s)</div>`;
+                let lastDate = null;
+                for (const m of data.matches) {
+                    if (m.date !== lastDate) {
+                        html += `<div class="sr-date">${m.date}</div>`;
+                        lastDate = m.date;
+                    }
+                    const hi = m.excerpt.replace(new RegExp(_esc(q), 'gi'), s => `<mark>${_esc(s)}</mark>`);
+                    html += `<div class="sr-excerpt">${hi}</div>`;
+                }
+                _showSearchResults(html);
+            } catch (e) {
+                _showSearchResults(`<p class="sr-error">Search failed: ${e.message}</p>`);
+            } finally {
+                sessionSearchBtn.disabled = false;
+                sessionSearchBtn.textContent = 'Go';
+            }
+        }
+
+        sessionSearchBtn.addEventListener('click', runSessionSearch);
+        sessionSearchInput.addEventListener('keydown', (e) => {
+            if (e.key === 'Enter') runSessionSearch();
+        });
+
+        // When a file is clicked, switch back from search results to editor
+        fileSidebar.addEventListener('click', () => {
+            if (sessionSearchResults.style.display !== 'none') _showFileView();
+        });
+
        document.addEventListener('keydown', (e) => {
            if (e.key === 'Escape') {
                if (fileModal.classList.contains('open')) fileModal.classList.remove('open');
--- a/cortex/static/index.html
+++ b/cortex/static/index.html
@@ -21,6 +21,8 @@
    </script>
    <link rel="stylesheet" href="/static/style.css">
    <script src="/static/marked.min.js"></script>
+    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.11.1/styles/atom-one-dark.min.css">
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.11.1/highlight.min.js"></script>
    <script src="https://unpkg.com/lucide@latest/dist/umd/lucide.min.js"></script>
 </head>
 <body>
@@ -101,6 +103,7 @@
                <div class="ctx-row">
                    <button id="backend-toggle" class="ctx-btn" title="Click to switch primary backend">claude</button>
                </div>
+                <div id="backend-model-hint"></div>
            </div>
            <div class="ctx-section">
                <div class="ctx-section-title">Display</div>
@@ -123,16 +126,28 @@
        <div id="file-modal-inner">
            <div id="file-modal-header">
                <span id="file-modal-title">Context Files</span>
-                <select id="file-select"></select>
+                <span class="fm-spacer"></span>
                <button class="fm-btn" id="file-raw-btn">edit</button>
                <button class="fm-btn active" id="file-preview-btn">preview</button>
                <button class="fm-btn save" id="file-save-btn">Save</button>
-                <span id="file-saved-msg">saved ✓</span>
                <button class="fm-btn" id="file-close-btn">✕</button>
            </div>
-            <div id="file-modal-body">
-                <textarea id="file-editor" spellcheck="false"></textarea>
-                <div id="file-preview"></div>
+            <div id="file-modal-content">
+                <div id="file-sidebar-wrap">
+                    <div id="file-sidebar"></div>
+                    <div id="session-search-wrap">
+                        <div id="session-search-label">Session Search</div>
+                        <div id="session-search-row">
+                            <input id="session-search-input" type="search" placeholder="Search sessions…" autocomplete="off">
+                            <button id="session-search-btn">Go</button>
+                        </div>
+                    </div>
+                </div>
+                <div id="file-modal-body">
+                    <textarea id="file-editor" spellcheck="false"></textarea>
+                    <div id="file-preview"></div>
+                    <div id="session-search-results" style="display:none"></div>
+                </div>
            </div>
        </div>
    </div>
@@ -169,6 +184,8 @@
        </div>
    </div>

+    <div id="sessions-backdrop"></div>
+    <div id="toast-container"></div>
    <script src="/static/app.js"></script>
 </body>
 </html>
--- a/cortex/static/local_llm.html
+++ b/cortex/static/local_llm.html
@@ -0,0 +1,307 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>Cortex — Local Models</title>
+  <link rel="preconnect" href="https://fonts.googleapis.com">
+  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@100..900&display=swap" rel="stylesheet">
+  <style>
+    *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
+
+    body {
+      min-height: 100vh;
+      background: #0f1117;
+      font-family: 'Inter', system-ui, -apple-system, sans-serif;
+      font-weight: 450;
+      -webkit-font-smoothing: antialiased;
+      color: #e2e8f0;
+      padding: 2rem 1.5rem 4rem;
+    }
+
+    .page { max-width: 640px; margin: 0 auto; }
+
+    /* ── Nav ── */
+    .page-nav {
+      display: flex; align-items: center; gap: 0.25rem;
+      margin-bottom: 1.75rem; flex-wrap: wrap;
+    }
+    .nav-link {
+      display: inline-flex; align-items: center;
+      padding: 0.3rem 0.6rem; border-radius: 6px;
+      font-size: 0.8rem; font-weight: 500; color: #64748b;
+      text-decoration: none; transition: color 0.15s, background 0.15s;
+      white-space: nowrap;
+    }
+    .nav-link:hover { color: #cbd5e1; background: rgba(255,255,255,0.05); }
+    .nav-link.active { color: #a78bfa; }
+    .nav-spacer { flex: 1; min-width: 0.5rem; }
+    .nav-link.nav-logout { color: #475569; }
+    .nav-link.nav-logout:hover { color: #94a3b8; background: none; }
+
+    /* ── Page header ── */
+    .page-header { margin-bottom: 2rem; padding-bottom: 1rem; border-bottom: 1px solid #2d3148; }
+    .page-header h1 { font-size: 1.4rem; font-weight: 700; color: #a78bfa; }
+    .page-header p  { font-size: 0.82rem; color: #94a3b8; margin-top: 0.25rem; }
+
+    /* ── Section cards ── */
+    .section {
+      background: #1a1d27; border: 1px solid #2d3148;
+      border-radius: 10px; padding: 1.5rem; margin-bottom: 1.25rem;
+    }
+    .section h2 {
+      font-size: 0.85rem; font-weight: 600; color: #94a3b8;
+      text-transform: uppercase; letter-spacing: 0.05em;
+      margin-bottom: 1.1rem; padding-bottom: 0.5rem;
+      border-bottom: 1px solid #2d3148;
+    }
+
+    /* ── Form elements ── */
+    .field { margin-bottom: 0.9rem; }
+    label {
+      display: block; font-size: 0.78rem; font-weight: 500;
+      color: #94a3b8; margin-bottom: 0.35rem;
+    }
+    input[type="text"], input[type="password"], input[type="url"], select {
+      width: 100%; padding: 0.6rem 0.8rem;
+      background: #0f1117; border: 1px solid #2d3148; border-radius: 6px;
+      color: #e2e8f0; font-size: 0.9rem; font-family: inherit;
+      outline: none; transition: border-color 0.15s;
+    }
+    input:focus, select:focus { border-color: #7c3aed; }
+    select { cursor: pointer; }
+
+    .field-row { display: flex; gap: 0.75rem; }
+    .field-row .field { flex: 1; margin-bottom: 0; }
+
+    .hint { font-size: 0.75rem; color: #94a3b8; margin-top: 0.35rem; }
+
+    /* ── Buttons ── */
+    .btn {
+      padding: 0.6rem 1.1rem; border: none; border-radius: 6px;
+      font-size: 0.88rem; font-weight: 600; cursor: pointer;
+      transition: background 0.15s, opacity 0.15s; font-family: inherit;
+    }
+    .btn-primary { background: #7c3aed; color: #fff; }
+    .btn-primary:hover { background: #6d28d9; }
+    .btn-secondary {
+      background: #1a1d27; color: #94a3b8;
+      border: 1px solid #2d3148;
+    }
+    .btn-secondary:hover { border-color: #94a3b8; color: #e2e8f0; }
+    .btn-sm { padding: 0.35rem 0.7rem; font-size: 0.8rem; font-weight: 500; }
+    .btn-row { display: flex; gap: 0.6rem; align-items: center; margin-top: 0.5rem; }
+
+    /* ── Model list ── */
+    .model-row {
+      display: flex; align-items: center; justify-content: space-between;
+      gap: 0.75rem; padding: 0.75rem 0.9rem;
+      background: #0f1117; border: 1px solid #2d3148; border-radius: 8px;
+      margin-bottom: 0.5rem;
+    }
+    .model-row.model-active { border-color: #7c3aed; background: #13102a; }
+    .model-info { display: flex; flex-direction: column; gap: 0.2rem; min-width: 0; }
+    .model-label { font-size: 0.9rem; font-weight: 600; color: #e2e8f0; }
+    .model-name  { font-size: 0.75rem; color: #64748b; font-family: monospace; word-break: break-all; }
+    .model-host  { font-size: 0.72rem; color: #475569; }
+    .active-badge {
+      display: inline-block; margin-left: 0.5rem;
+      padding: 0.1rem 0.45rem; border-radius: 3px;
+      background: #4c1d95; color: #c4b5fd;
+      font-size: 0.68rem; font-weight: 600; text-transform: uppercase;
+      vertical-align: middle;
+    }
+    .active-label { font-size: 0.8rem; color: #a78bfa; font-weight: 500; }
+    .model-actions { display: flex; gap: 0.4rem; flex-shrink: 0; }
+    .row-btn {
+      padding: 0.3rem 0.65rem; border-radius: 5px; font-size: 0.78rem;
+      font-weight: 500; cursor: pointer; font-family: inherit;
+      border: 1px solid #2d3148; background: #1a1d27; color: #94a3b8;
+      transition: border-color 0.15s, color 0.15s;
+    }
+    .row-btn:hover { border-color: #7c3aed; color: #a78bfa; }
+    .row-btn.danger { color: #f87171; border-color: #2d3148; }
+    .row-btn.danger:hover { border-color: #f87171; }
+    .empty-note { font-size: 0.85rem; color: #475569; padding: 0.5rem 0; }
+
+    /* ── Fetch models ── */
+    #fetch-status { font-size: 0.8rem; color: #94a3b8; margin-top: 0.5rem; min-height: 1.2rem; }
+    #fetch-status.ok    { color: #4ade80; }
+    #fetch-status.err   { color: #f87171; }
+    #model-select-wrap  { display: none; margin-top: 0.75rem; }
+
+    /* ── Messages ── */
+    .msg {
+      font-size: 0.85rem; text-align: center;
+      padding: 0.6rem 1rem; border-radius: 6px; margin-bottom: 1rem;
+    }
+    .msg.success { color: #4ade80; background: #052e16; border: 1px solid #166534; }
+    .msg.error   { color: #f87171; background: #2d0a0a; border: 1px solid #7f1d1d; }
+
+    /* ── Key hint ── */
+    .key-status { font-size: 0.75rem; color: #94a3b8; margin-top: 0.35rem; }
+  </style>
+</head>
+<body>
+  <div class="page">
+    <nav class="page-nav">
+      <a href="/" class="nav-link">← Chat</a>
+      <a href="/help" class="nav-link">Help</a>
+      <a href="/settings" class="nav-link">Settings</a>
+      <a href="/settings/local" class="nav-link active">Local Models</a>
+      <span class="nav-spacer"></span>
+      <a href="/logout" class="nav-link nav-logout">Sign out</a>
+    </nav>
+
+    <div class="page-header">
+      <h1>Local Models</h1>
+      <p>Configure your OpenAI-compatible host and models (Open WebUI, Ollama, LM Studio, etc.)</p>
+    </div>
+
+    <!-- SUCCESS -->
+    <!-- ERROR -->
+
+    <!-- ── Host ── -->
+    <div class="section">
+      <h2>Host</h2>
+      <p style="font-size:0.82rem; color:#94a3b8; margin-bottom:1rem; line-height:1.55;">
+        The API server that hosts your local models. Leave the key blank to keep the existing one.
+      </p>
+      <form method="POST" action="/settings/local/host">
+        <input type="hidden" name="host_id" value="{{ host_id }}">
+        <div class="field-row">
+          <div class="field">
+            <label for="host_label">Label</label>
+            <input type="text" id="host_label" name="label"
+                   value="{{ host_label }}" placeholder="e.g. Home ML Laptop"
+                   autocomplete="off" data-form-type="other">
+          </div>
+          <div class="field" style="flex:2">
+            <label for="host_url">API URL</label>
+            <input type="text" id="host_url" name="api_url"
+                   value="{{ host_url }}" placeholder="http://192.168.x.x:3000"
+                   autocomplete="off" spellcheck="false" data-form-type="other">
+          </div>
+        </div>
+        <div class="field">
+          <label for="host_key">API Key</label>
+          <input type="password" id="host_key" name="api_key"
+                 placeholder="{{ host_key_hint }}"
+                 autocomplete="new-password"
+                 data-1p-ignore data-lpignore="true" data-form-type="other">
+          <p class="key-status">Current: {{ host_key_hint }}</p>
+        </div>
+        <div class="btn-row">
+          <button type="submit" class="btn btn-primary btn-sm">Save Host</button>
+          <button type="button" id="fetch-btn" class="btn btn-secondary btn-sm"
+                  {{ has_host == 'false' and 'disabled title="Save a host first"' or '' }}>
+            Fetch models from host
+          </button>
+          <span id="fetch-status"></span>
+        </div>
+      </form>
+    </div>
+
+    <!-- ── Configured models ── -->
+    <div class="section">
+      <h2>Models</h2>
+      {{ model_rows }}
+    </div>
+
+    <!-- ── Add model ── -->
+    <div class="section" id="add-section"{{ add_section_hidden }}>
+      <h2>Add Model</h2>
+
+      <div id="model-select-wrap">
+        <div class="field">
+          <label for="model-picker">Available on host</label>
+          <select id="model-picker">
+            <option value="">— select a model —</option>
+          </select>
+        </div>
+      </div>
+
+      <form method="POST" action="/settings/local/models/add" id="add-form">
+        <input type="hidden" name="host_id" value="{{ first_host_id }}">
+        <div class="field-row">
+          <div class="field">
+            <label for="add-label">Label <span style="color:#475569; font-weight:400">(friendly name)</span></label>
+            <input type="text" id="add-label" name="label"
+                   placeholder="e.g. Qwen3 8B"
+                   autocomplete="off" data-form-type="other">
+          </div>
+          <div class="field" style="flex:2">
+            <label for="add-model-name">Model name</label>
+            <input type="text" id="add-model-name" name="model_name"
+                   placeholder="e.g. test-agent-simple"
+                   autocomplete="off" spellcheck="false" data-form-type="other">
+          </div>
+        </div>
+        <button type="submit" class="btn btn-primary btn-sm">Add Model</button>
+      </form>
+    </div>
+  </div>
+
+  <script>
+    const fetchBtn    = document.getElementById('fetch-btn');
+    const fetchStatus = document.getElementById('fetch-status');
+    const picker      = document.getElementById('model-picker');
+    const pickerWrap  = document.getElementById('model-select-wrap');
+    const labelInput  = document.getElementById('add-label');
+    const nameInput   = document.getElementById('add-model-name');
+
+    if (fetchBtn) {
+      fetchBtn.addEventListener('click', async () => {
+        fetchBtn.disabled = true;
+        fetchStatus.textContent  = 'Fetching…';
+        fetchStatus.className    = '';
+
+        try {
+          const res  = await fetch('/api/local-llm/fetch-models');
+          const data = await res.json();
+
+          if (data.error) {
+            fetchStatus.textContent = '✗ ' + data.error;
+            fetchStatus.className   = 'err';
+            return;
+          }
+
+          picker.innerHTML = '<option value="">— select a model —</option>';
+          for (const m of data.models) {
+            const opt   = document.createElement('option');
+            opt.value       = m.id;
+            opt.textContent = m.name !== m.id ? `${m.name}  (${m.id})` : m.id;
+            opt.dataset.id  = m.id;
+            opt.dataset.name = m.name;
+            picker.appendChild(opt);
+          }
+
+          pickerWrap.style.display = 'block';
+          fetchStatus.textContent  = `✓ ${data.models.length} model${data.models.length !== 1 ? 's' : ''} found`;
+          fetchStatus.className    = 'ok';
+        } catch (e) {
+          fetchStatus.textContent = '✗ ' + e.message;
+          fetchStatus.className   = 'err';
+        } finally {
+          fetchBtn.disabled = false;
+        }
+      });
+    }
+
+    // Auto-fill label + model name when a model is selected from the picker
+    picker.addEventListener('change', () => {
+      const opt = picker.options[picker.selectedIndex];
+      if (!opt.value) return;
+      nameInput.value  = opt.dataset.id  || opt.value;
+      // Only pre-fill label if it looks different from the model id
+      if (opt.dataset.name && opt.dataset.name !== opt.dataset.id) {
+        labelInput.value = opt.dataset.name;
+      } else {
+        labelInput.value = '';
+      }
+      nameInput.focus();
+    });
+  </script>
+</body>
+</html>
--- a/cortex/static/settings.html
+++ b/cortex/static/settings.html
@@ -241,7 +241,8 @@
          <label for="new_username">New username</label>
          <input type="text" id="new_username" name="new_username"
                 value="{{ username }}"
-                 pattern="[a-z_][a-z0-9_\-]{0,31}" required autofocus>
+                 pattern="[a-z_][a-z0-9_\-]{0,31}" required autofocus
+                 autocomplete="off" data-form-type="other">
          <p style="font-size:0.75rem; color:#94a3b8; margin-top:0.3rem;">
            Lowercase letters, digits, _ or - only. You will be logged out after renaming.
          </p>
@@ -281,8 +282,9 @@
        <div class="field">
          <label for="gemini_api_key">API Key</label>
          <input type="text" id="gemini_api_key" name="gemini_api_key"
-                 placeholder="{{ gemini_key_hint }}" autocomplete="off"
-                 spellcheck="false" data-1p-ignore data-lpignore="true">
+                 placeholder="{{ gemini_key_hint }}"
+                 autocomplete="new-password" spellcheck="false"
+                 data-1p-ignore data-lpignore="true" data-form-type="other">
        </div>
        <button type="submit">Save Key</button>
      </form>
@@ -294,6 +296,20 @@
      </p>
    </div>

+    <!-- Local models link -->
+    <div class="section">
+      <h2>Local Models</h2>
+      <p style="font-size:0.8rem; color:#94a3b8; margin-bottom:0.85rem; line-height:1.55;">
+        Configure OpenAI-compatible hosts and models (Open WebUI, Ollama, LM Studio, etc.).
+      </p>
+      <a href="/settings/local"
+         style="display:inline-block; padding:0.55rem 1rem; background:#7c3aed; border-radius:6px;
+                color:#fff; font-size:0.88rem; font-weight:600; text-decoration:none;
+                transition:background 0.15s;">
+        Manage local models →
+      </a>
+    </div>
+
    <!-- Change password -->
    <div class="section">
      <h2>Change Password</h2>
--- a/cortex/static/style.css
+++ b/cortex/static/style.css
@@ -431,6 +431,8 @@
            padding: 0;
            font-size: 0.85em;
        }
+        /* Syntax highlighting — app theme controls the pre background; hljs adds token colors */
+        .message.assistant pre code.hljs { background: transparent; padding: 0; }

        .message.system {
            align-self: center;
@@ -440,6 +442,80 @@
            padding: 2px 0;
        }

+        /* ── Tool call step cards (agent mode) ── */
+        .tool-calls-container {
+            display: flex;
+            flex-direction: column;
+            gap: 3px;
+            margin: 4px 0 6px;
+            align-self: stretch;
+        }
+        .tool-call {
+            background: var(--surface);
+            border: 1px solid var(--border);
+            border-radius: 6px;
+            overflow: hidden;
+            font-size: 0.78rem;
+        }
+        .tool-call summary {
+            display: flex;
+            align-items: baseline;
+            gap: 0.5rem;
+            padding: 0.35rem 0.65rem;
+            cursor: pointer;
+            list-style: none;
+            user-select: none;
+            color: var(--muted);
+        }
+        .tool-call summary::-webkit-details-marker { display: none; }
+        .tool-call summary::before {
+            content: '▶';
+            font-size: 0.55rem;
+            color: var(--muted);
+            transition: transform 0.12s;
+            flex-shrink: 0;
+        }
+        .tool-call[open] summary::before { transform: rotate(90deg); }
+        .tool-call summary:hover { color: var(--text); background: rgba(255,255,255,0.03); }
+        .tc-name {
+            font-weight: 600;
+            color: var(--accent);
+            font-family: 'Courier New', monospace;
+        }
+        .tc-snippet {
+            color: var(--muted);
+            overflow: hidden;
+            text-overflow: ellipsis;
+            white-space: nowrap;
+            max-width: 36ch;
+        }
+        .tc-body {
+            padding: 0 0.65rem 0.5rem;
+            display: flex;
+            flex-direction: column;
+            gap: 0.4rem;
+        }
+        .tc-section { display: flex; flex-direction: column; gap: 2px; }
+        .tc-label {
+            font-size: 0.68rem;
+            font-weight: 600;
+            text-transform: uppercase;
+            letter-spacing: 0.05em;
+            color: var(--muted);
+        }
+        .tc-body pre {
+            margin: 0;
+            background: var(--pre-bg);
+            border: 1px solid var(--border);
+            border-radius: 4px;
+            padding: 6px 8px;
+            font-size: 0.78rem;
+            white-space: pre-wrap;
+            word-break: break-word;
+            color: var(--text);
+            overflow-x: auto;
+        }
+
        .message.error {
            align-self: flex-start;
            background: var(--error-bg);
@@ -451,7 +527,7 @@
        .message.thinking { color: var(--muted); font-style: italic; }

        /* Copy button */
-        .message.assistant { position: relative; }
+        .message.assistant, .message.user { position: relative; }

        .copy-btn {
            display: inline-flex;
@@ -471,7 +547,8 @@
            transition: opacity 0.15s, color 0.15s, border-color 0.15s;
        }

-        .message.assistant:hover .copy-btn { opacity: 1; }
+        .message.assistant:hover .copy-btn,
+        .message.user:hover .copy-btn { opacity: 1; }
        .copy-btn:hover  { color: var(--text); border-color: var(--muted); }
        .copy-btn.copied { color: var(--success); border-color: var(--success-dim); }

@@ -807,22 +884,12 @@
            flex-shrink: 0;
        }

-        #file-modal-header select {
-            background: var(--surface);
-            border: 1px solid var(--border);
-            border-radius: 5px;
-            color: var(--text);
-            font-size: 0.85rem;
-            padding: 4px 8px;
-            cursor: pointer;
-        }
-
        #file-modal-title {
            font-size: 0.9rem;
            font-weight: 600;
            color: var(--accent);
-            flex: 1;
        }
+        .fm-spacer { flex: 1; }

        .fm-btn {
            background: var(--bg);
@@ -838,13 +905,153 @@
        .fm-btn.active { color: var(--accent); border-color: var(--accent); }
        .fm-btn.save   { color: var(--accent); border-color: var(--inara-border); }
        .fm-btn.save:hover { background: var(--inara-bg); }
-        #file-saved-msg {
-            font-size: 0.75rem;
-            color: #6abf6a;
-            opacity: 0;
-            transition: opacity 0.3s;
+        #file-modal-content {
+            flex: 1;
+            display: flex;
+            overflow: hidden;
+        }
+
+        /* ── File sidebar ── */
+        #file-sidebar-wrap {
+            width: 190px;
+            flex-shrink: 0;
+            border-right: 1px solid var(--border);
+            display: flex;
+            flex-direction: column;
+            background: var(--bg);
+        }
+        #file-sidebar {
+            flex: 1;
+            overflow-y: auto;
+        }
+
+        /* ── Session search (within sidebar) ── */
+        #session-search-wrap {
+            border-top: 1px solid var(--border);
+            padding: 8px 8px 10px;
+        }
+        #session-search-label {
+            font-size: 0.65rem;
+            font-weight: 700;
+            text-transform: uppercase;
+            letter-spacing: 0.06em;
+            color: var(--muted);
+            margin-bottom: 5px;
+        }
+        #session-search-row {
+            display: flex;
+            gap: 4px;
+        }
+        #session-search-input {
+            flex: 1;
+            min-width: 0;
+            background: var(--surface);
+            border: 1px solid var(--border);
+            border-radius: 4px;
+            color: var(--text);
+            font-size: 0.78rem;
+            padding: 3px 6px;
+        }
+        #session-search-btn {
+            background: var(--surface);
+            border: 1px solid var(--border);
+            border-radius: 4px;
+            color: var(--muted);
+            font-size: 0.78rem;
+            padding: 3px 8px;
+            cursor: pointer;
+        }
+        #session-search-btn:hover { color: var(--accent); border-color: var(--accent); }
+
+        /* ── Session search results panel ── */
+        #session-search-results {
+            flex: 1;
+            overflow-y: auto;
+            padding: 12px 14px;
+            font-size: 0.82rem;
+        }
+        .sr-header { color: var(--muted); font-size: 0.72rem; margin-bottom: 10px; }
+        .sr-date {
+            font-size: 0.7rem;
+            font-weight: 700;
+            text-transform: uppercase;
+            letter-spacing: 0.05em;
+            color: var(--accent);
+            margin: 14px 0 4px;
+        }
+        .sr-date:first-of-type { margin-top: 0; }
+        .sr-excerpt {
+            background: var(--surface);
+            border-left: 2px solid var(--border);
+            border-radius: 0 4px 4px 0;
+            padding: 6px 10px;
+            margin-bottom: 6px;
+            line-height: 1.5;
+            white-space: pre-wrap;
+            word-break: break-word;
+            color: var(--text);
+        }
+        .sr-excerpt mark {
+            background: rgba(139,92,246,0.25);
+            color: var(--accent);
+            border-radius: 2px;
+            padding: 0 1px;
+        }
+        .sr-empty, .sr-error { color: var(--muted); padding: 8px 0; }
+
+        .fg-header {
+            display: flex;
+            align-items: center;
+            gap: 0.3rem;
+            padding: 7px 10px 5px;
+            font-size: 0.68rem;
+            font-weight: 700;
+            text-transform: uppercase;
+            letter-spacing: 0.06em;
+            color: var(--muted);
+            cursor: pointer;
+            user-select: none;
+        }
+        .fg-header::before {
+            content: '▾';
+            font-size: 0.7rem;
+            transition: transform 0.15s;
+        }
+        .fg-header.collapsed::before { transform: rotate(-90deg); }
+        .fg-header.collapsed + .fg-items { display: none; }
+
+        .fg-items { display: flex; flex-direction: column; }
+
+        .file-item {
+            padding: 6px 10px 6px 16px;
+            cursor: pointer;
+            border-left: 2px solid transparent;
+            transition: background 0.1s, border-color 0.1s;
+        }
+        .file-item:hover { background: var(--surface); }
+        .file-item.active {
+            background: var(--inara-bg);
+            border-left-color: var(--accent);
+        }
+        .file-item.missing { opacity: 0.45; }
+
+        .fi-name {
+            font-size: 0.8rem;
+            color: var(--text);
+            font-weight: 500;
+            white-space: nowrap;
+            overflow: hidden;
+            text-overflow: ellipsis;
+        }
+        .file-item.active .fi-name { color: var(--accent); }
+
+        .fi-meta {
+            display: flex;
+            gap: 0.5rem;
+            margin-top: 2px;
+            font-size: 0.68rem;
+            color: var(--muted);
        }
-        #file-saved-msg.show { opacity: 1; }

        #file-modal-body {
            flex: 1;
@@ -935,9 +1142,14 @@
            cursor: pointer;
            transition: color 0.15s, border-color 0.15s, background 0.15s;
        }
-        .ctx-btn:hover  { color: var(--text); border-color: var(--muted); }
-        .ctx-btn.active { color: var(--accent); border-color: var(--accent); }
-        .ctx-btn.mem-on { color: var(--success); border-color: var(--success-dim); }
+        .ctx-btn:hover    { color: var(--text); border-color: var(--muted); }
+        .ctx-btn.active   { color: var(--accent); border-color: var(--accent); }
+        .ctx-btn.mem-on   { color: var(--success); border-color: var(--success-dim); }
+        .ctx-btn.local-on { color: #f59e0b; border-color: #92400e; }
+        #backend-model-hint {
+            font-size: 0.68rem; color: #f59e0b; opacity: 0.8;
+            margin-top: 4px; word-break: break-all; line-height: 1.3;
+        }

        #ctx-distill-status {
            margin-top: 6px;
@@ -1173,6 +1385,48 @@

        #auth-banner-close:hover { opacity: 1; }

+        /* ── Toasts ──────────────────────────────────────────────── */
+        #toast-container {
+            position: fixed;
+            bottom: 1.25rem;
+            right: 1.25rem;
+            display: flex;
+            flex-direction: column;
+            align-items: flex-end;
+            gap: 0.4rem;
+            z-index: 9999;
+            pointer-events: none;
+        }
+        .toast {
+            padding: 0.45rem 0.85rem;
+            border-radius: 6px;
+            font-size: 0.8rem;
+            font-weight: 500;
+            color: #fff;
+            background: #334155;
+            border: 1px solid #475569;
+            box-shadow: 0 4px 12px rgba(0,0,0,0.35);
+            opacity: 0;
+            transform: translateY(6px);
+            transition: opacity 0.18s ease, transform 0.18s ease;
+            pointer-events: none;
+            white-space: nowrap;
+        }
+        .toast.show { opacity: 1; transform: translateY(0); }
+        .toast.success { background: #14532d; border-color: #16a34a; }
+        .toast.error   { background: #7f1d1d; border-color: #dc2626; }
+
+        /* Sessions backdrop — hidden by default, visible only as mobile drawer overlay */
+        #sessions-backdrop {
+            display: none;
+            position: fixed;
+            inset: 0;
+            background: rgba(0, 0, 0, 0.5);
+            z-index: 98;
+            animation: backdrop-in 0.2s ease;
+        }
+        @keyframes backdrop-in { from { opacity: 0; } to { opacity: 1; } }
+
        /* ── Mobile responsive ───────────────────────────────────── */
        @media (max-width: 520px) {
            header { padding: 8px 12px; gap: 8px; }
@@ -1233,6 +1487,36 @@

            /* Larger touch targets */
            #send, #stop { padding: 12px 14px; font-size: 1rem; }
+
+            /* File modal: sidebar collapses to a narrow strip */
+            #file-modal-inner { width: 100vw; height: 100dvh; border-radius: 0; }
+            #file-sidebar-wrap { width: 130px; }
+            .fi-meta { display: none; }
+
+            /* Sessions backdrop active on mobile */
+            #sessions-backdrop.open { display: block; }
+
+            /* Sessions panel → full-height drawer sliding in from the right */
+            #sessions-panel {
+                display: block !important; /* keep rendered so transition works */
+                position: fixed;
+                top: 0;
+                right: 0;
+                bottom: 0;
+                width: min(300px, 85vw);
+                max-height: none;
+                height: 100%;
+                border-radius: 0;
+                border-top: none;
+                border-right: none;
+                border-bottom: none;
+                border-left: 1px solid var(--border);
+                transform: translateX(110%);
+                transition: transform 0.25s ease;
+                z-index: 99;
+                overflow-y: auto;
+            }
+            #sessions-panel.open { transform: translateX(0); }
        }

        /* ── Touch devices — no hover capability ─────────────────── */
--- a/cortex/user_settings.py
+++ b/cortex/user_settings.py
@@ -0,0 +1,194 @@
+"""
+Per-user settings stored in home/{user}/local_llm.json.
+
+Structure:
+  {
+    "hosts": [{"id", "label", "api_url", "api_key"}, ...],
+    "models": [{"id", "host_id", "label", "model_name"}, ...],
+    "active_model_id": "<model id>" | null
+  }
+
+Values not configured here fall back to .env server defaults.
+"""
+import json
+import logging
+import secrets
+from pathlib import Path
+
+from config import settings as app_settings
+
+logger = logging.getLogger(__name__)
+
+
+def _llm_path(username: str) -> Path:
+    return app_settings.home_root() / username / "local_llm.json"
+
+
+def _empty() -> dict:
+    return {"hosts": [], "models": [], "active_model_id": None}
+
+
+def _load(username: str) -> dict:
+    path = _llm_path(username)
+    if not path.exists():
+        return _empty()
+    try:
+        data = json.loads(path.read_text())
+    except (json.JSONDecodeError, OSError):
+        logger.warning("local_llm.json for %s is unreadable — starting fresh", username)
+        return _empty()
+
+    # Migrate old single-model format {api_url, api_key, model} → new format
+    if "hosts" not in data:
+        return _migrate_v0(data)
+
+    return data
+
+
+def _migrate_v0(old: dict) -> dict:
+    """Migrate flat {api_url, api_key, model} → hosts/models structure."""
+    data = _empty()
+    api_url    = old.get("api_url")    or app_settings.local_api_url
+    api_key    = old.get("api_key")    or app_settings.local_api_key
+    model_name = old.get("model")      or app_settings.local_model
+
+    if not api_url:
+        return data
+
+    host_id = secrets.token_hex(4)
+    data["hosts"].append({
+        "id":      host_id,
+        "label":   "Local Model Server",
+        "api_url": api_url,
+        "api_key": api_key,
+    })
+
+    if model_name:
+        model_id = secrets.token_hex(4)
+        data["models"].append({
+            "id":         model_id,
+            "host_id":    host_id,
+            "label":      model_name,
+            "model_name": model_name,
+        })
+        data["active_model_id"] = model_id
+
+    logger.info("migrated local_llm.json v0 → v1 for user (host=%s)", host_id)
+    return data
+
+
+def _save(username: str, data: dict) -> None:
+    _llm_path(username).write_text(json.dumps(data, indent=2))
+
+
+# ── Public read API ───────────────────────────────────────────────────────────
+
+def get_config(username: str) -> dict:
+    """Return the full local LLM config for the user."""
+    return _load(username)
+
+
+def get_active_local_model(username: str) -> dict | None:
+    """Return effective {api_url, api_key, model_name, label} for the active model.
+
+    Resolution order:
+      1. User's active model + its host config
+      2. .env server defaults (LOCAL_API_URL / LOCAL_API_KEY / LOCAL_MODEL)
+      3. None — caller should raise a helpful error
+    """
+    data = _load(username)
+
+    active_id = data.get("active_model_id")
+    model = next((m for m in data["models"] if m["id"] == active_id), None)
+
+    if model:
+        host = next((h for h in data["hosts"] if h["id"] == model["host_id"]), None)
+        if host:
+            return {
+                "api_url":    host.get("api_url", ""),
+                "api_key":    host.get("api_key", ""),
+                "model_name": model["model_name"],
+                "label":      model.get("label") or model["model_name"],
+            }
+
+    # Fall back to .env defaults
+    if app_settings.local_api_url and app_settings.local_model:
+        return {
+            "api_url":    app_settings.local_api_url,
+            "api_key":    app_settings.local_api_key,
+            "model_name": app_settings.local_model,
+            "label":      app_settings.local_model,
+        }
+
+    return None
+
+
+# ── Host management ───────────────────────────────────────────────────────────
+
+def save_host(username: str, host_id: str | None,
+              label: str, api_url: str, api_key: str) -> str:
+    """Create or update a host. Returns the host ID.
+
+    api_key is only written when non-empty, so submitting a masked placeholder
+    with a blank key field leaves the stored key unchanged.
+    """
+    data = _load(username)
+
+    if host_id:
+        for h in data["hosts"]:
+            if h["id"] == host_id:
+                h["label"]   = label.strip()
+                h["api_url"] = api_url.strip()
+                if api_key.strip():
+                    h["api_key"] = api_key.strip()
+                break
+        else:
+            host_id = None  # ID not found — fall through to create
+
+    if not host_id:
+        host_id = secrets.token_hex(4)
+        data["hosts"].append({
+            "id":      host_id,
+            "label":   label.strip(),
+            "api_url": api_url.strip(),
+            "api_key": api_key.strip(),
+        })
+
+    _save(username, data)
+    return host_id
+
+
+# ── Model management ──────────────────────────────────────────────────────────
+
+def add_model(username: str, host_id: str, label: str, model_name: str) -> str:
+    """Add a model entry. Auto-activates if it is the first model. Returns the model ID."""
+    data = _load(username)
+    model_id = secrets.token_hex(4)
+    data["models"].append({
+        "id":         model_id,
+        "host_id":    host_id,
+        "label":      label.strip() or model_name.strip(),
+        "model_name": model_name.strip(),
+    })
+    if not data.get("active_model_id"):
+        data["active_model_id"] = model_id
+    _save(username, data)
+    return model_id
+
+
+def remove_model(username: str, model_id: str) -> None:
+    data = _load(username)
+    data["models"] = [m for m in data["models"] if m["id"] != model_id]
+    if data.get("active_model_id") == model_id:
+        data["active_model_id"] = data["models"][0]["id"] if data["models"] else None
+    _save(username, data)
+
+
+def set_active_model(username: str, model_id: str) -> bool:
+    """Set the active model. Returns False if the model ID is not found."""
+    data = _load(username)
+    if not any(m["id"] == model_id for m in data["models"]):
+        return False
+    data["active_model_id"] = model_id
+    _save(username, data)
+    return True