feat: local LLM multi-model, session search, cron proactive types, notifications, docs overhaul

Local LLM: - user_settings.py: per-user hosts/models config (local_llm.json) - routers/local_llm.py + static/local_llm.html: dedicated settings page - llm_client.py: local OpenAI-compatible backend via httpx - config.py: LOCAL_API_URL/KEY/MODEL + per-backend timeouts - Active model shown near backend toggle (amber hint text) Memory distillation: - memory_distiller.py: DISTILL_BACKEND_MID/LONG .env overrides - scheduler.py + notification.py: notify NC Talk after mid/long distill - notification.py: outbound channel abstraction (NC Talk, extensible) Session search: - routers/files.py: GET /sessions/search?q= with excerpts grouped by date - static/index.html + app.js: search UI in file sidebar with highlight - _esc() helper to prevent XSS in search results Proactive cron: - cron_runner.py: new job types — message (send directly) and brief (LLM + send) - Both support optional per-job channel override Channels: - routers/nextcloud_talk.py: consolidated using notification._send_nct_message() - routers/auth.py: local backend status in /auth/status - routers/chat.py: /backend returns {primary, fallback, local_model} object UI / UX: - Copy button for user messages (matching assistant) - Autocomplete disabled on sensitive form fields - settings.html: local model section replaced with link to /settings/local Docs overhaul: - MASTER.md hub + ARCH__SYSTEM/BACKENDS/PERSONA/CHANNELS/FUTURE.md - ARCH__Intelligence_Layer.md replaced with redirect table - CORTEX.md trimmed to vision only; README updated - OPEN_WEBUI_API.md added to docs/ Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-05 20:53:06 -04:00
parent bd6532e93a
commit a4daebdc9b
33 changed files with 2985 additions and 486 deletions
--- a/cortex/routers/auth.py
+++ b/cortex/routers/auth.py
@@ -13,6 +13,7 @@ import logging
 from datetime import datetime, timezone
 from pathlib import Path
 from fastapi import APIRouter
+from config import settings

 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/auth")
@@ -71,9 +72,27 @@ def _gemini_status() -> dict:
        return {"ok": False, "error": str(e), "warning": True, "authenticated": False}


+async def _local_status() -> dict:
+    if not settings.local_api_url:
+        return {"configured": False}
+    try:
+        import httpx
+        url = settings.local_api_url.rstrip("/") + "/api/models"
+        headers = {}
+        if settings.local_api_key:
+            headers["Authorization"] = f"Bearer {settings.local_api_key}"
+        async with httpx.AsyncClient(timeout=5) as client:
+            resp = await client.get(url, headers=headers)
+        reachable = resp.status_code < 400
+        return {"configured": True, "reachable": reachable, "model": settings.local_model}
+    except Exception as e:
+        return {"configured": True, "reachable": False, "error": str(e), "model": settings.local_model}
+
+
@router.get("/status")
 async def auth_status() -> dict:
    return {
        "claude": _claude_status(),
        "gemini": _gemini_status(),
+        "local": await _local_status(),
    }
--- a/cortex/routers/chat.py
+++ b/cortex/routers/chat.py
@@ -1,6 +1,7 @@
 import asyncio
 import json
-from fastapi import APIRouter, HTTPException, Query
+import jwt
+from fastapi import APIRouter, HTTPException, Query, Request
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
 from context_loader import load_context
@@ -9,6 +10,8 @@ from session_logger import log_turn
 from session_store import load as load_session, save as save_session, list_all, generate_session_id, delete as delete_session, rename as rename_session
 from config import settings
 from persona import set_context, validate as validate_persona
+from auth_utils import COOKIE_NAME, decode_token
+import user_settings
 import event_bus


@@ -29,7 +32,7 @@ class ChatRequest(BaseModel):


 class BackendRequest(BaseModel):
-    primary: str  # "claude" or "gemini"
+    primary: str  # "claude", "gemini", or "local"


 class NoteRequest(BaseModel):
@@ -130,19 +133,45 @@ async def chat(req: ChatRequest) -> StreamingResponse:
    )


+_BACKEND_CYCLE = ("claude", "gemini", "local")
+_BACKEND_FALLBACK = {"claude": "gemini", "gemini": "claude", "local": "claude"}
+
+
+def _local_model_info(request: Request) -> dict | None:
+    """Return active local model {label, model_name} for the session user, or None."""
+    try:
+        token    = request.cookies.get(COOKIE_NAME)
+        username = decode_token(token) if token else None
+        if not username:
+            return None
+        cfg = user_settings.get_active_local_model(username)
+        if cfg:
+            return {"label": cfg["label"], "model_name": cfg["model_name"]}
+    except (jwt.InvalidTokenError, Exception):
+        pass
+    return None
+
+
@router.get("/backend")
-async def get_backend() -> dict:
-    other = "gemini" if settings.primary_backend == "claude" else "claude"
-    return {"primary": settings.primary_backend, "fallback": other}
+async def get_backend(request: Request) -> dict:
+    p = settings.primary_backend
+    return {
+        "primary":      p,
+        "fallback":     _BACKEND_FALLBACK.get(p, "claude"),
+        "local_model":  _local_model_info(request),
+    }


@router.post("/backend")
-async def set_backend(req: BackendRequest) -> dict:
-    if req.primary not in ("claude", "gemini"):
-        raise HTTPException(status_code=400, detail="primary must be 'claude' or 'gemini'")
+async def set_backend(req: BackendRequest, request: Request) -> dict:
+    if req.primary not in _BACKEND_CYCLE:
+        raise HTTPException(status_code=400, detail="primary must be 'claude', 'gemini', or 'local'")
    settings.primary_backend = req.primary
-    other = "gemini" if req.primary == "claude" else "claude"
-    return {"primary": settings.primary_backend, "fallback": other}
+    return {
+        "primary":     req.primary,
+        "fallback":    _BACKEND_FALLBACK[req.primary],
+        "local_model": _local_model_info(request),
+    }


 def _set_ctx(user: str, persona: str) -> None:
--- a/cortex/routers/files.py
+++ b/cortex/routers/files.py
@@ -1,7 +1,8 @@
 """
-Read/write the Inara identity markdown files.
+Read/write Inara identity markdown files, and search past session logs.
 Only whitelisted filenames are accessible — no path traversal possible.
 """
+import re
 from fastapi import APIRouter, HTTPException, Query
 from pydantic import BaseModel
 from persona import persona_path, set_context, validate as validate_persona
@@ -47,10 +48,12 @@ async def list_files(
    files = []
    for name in sorted(ALLOWED):
        p = persona_dir / name
+        st = p.stat() if p.exists() else None
        files.append({
            "name": name,
            "exists": p.exists(),
-            "size": p.stat().st_size if p.exists() else 0,
+            "size": st.st_size if st else 0,
+            "modified": st.st_mtime if st else None,
        })
    return {"files": files}

@@ -83,3 +86,59 @@ async def save_file(
    p = _path(filename)
    p.write_text(req.content)
    return {"ok": True, "name": filename, "size": len(req.content)}
+
+
+# ── Session search ────────────────────────────────────────────────────────────
+
+_CONTEXT_CHARS = 120  # chars of context to include around each match
+
+
+@router.get("/sessions/search")
+async def search_sessions(
+    q: str = Query(..., min_length=2),
+    user: str = Query("scott"),
+    persona: str = Query("inara"),
+    limit: int = Query(20, ge=1, le=100),
+) -> dict:
+    """Full-text search across past session logs.
+
+    Returns up to `limit` matches, newest sessions first.
+    Each match includes a short excerpt (120 chars before/after) for context.
+    """
+    _resolve(user, persona)
+    sessions_dir = persona_path() / "sessions"
+    if not sessions_dir.exists():
+        return {"query": q, "matches": [], "total_files_searched": 0}
+
+    pattern = re.compile(re.escape(q), re.IGNORECASE)
+    session_files = sorted(sessions_dir.glob("*.md"), reverse=True)  # newest first
+
+    matches = []
+    for sf in session_files:
+        if len(matches) >= limit:
+            break
+        try:
+            text = sf.read_text()
+        except OSError:
+            continue
+        for m in pattern.finditer(text):
+            if len(matches) >= limit:
+                break
+            start = max(0, m.start() - _CONTEXT_CHARS)
+            end   = min(len(text), m.end() + _CONTEXT_CHARS)
+            excerpt = text[start:end].strip()
+            # Prefix with ellipsis if we truncated the left side
+            if start > 0:
+                excerpt = "…" + excerpt
+            if end < len(text):
+                excerpt = excerpt + "…"
+            matches.append({
+                "date":    sf.stem,          # YYYY-MM-DD
+                "excerpt": excerpt,
+            })
+
+    return {
+        "query":               q,
+        "matches":             matches,
+        "total_files_searched": len(session_files),
+    }
--- a/cortex/routers/local_llm.py
+++ b/cortex/routers/local_llm.py
@@ -0,0 +1,242 @@
+"""
+Local LLM settings — per-user host and model configuration.
+
+Routes:
+  GET  /settings/local                      → settings page
+  POST /settings/local/host                 → save/create host
+  POST /settings/local/models/add           → add model entry
+  POST /settings/local/models/{id}/activate → set active model
+  POST /settings/local/models/{id}/remove   → remove model entry
+  GET  /api/local-llm/fetch-models          → proxy to host /api/models (JSON)
+"""
+import logging
+from pathlib import Path
+
+import httpx
+import jwt
+from fastapi import APIRouter, Form, Request
+from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse
+
+from auth_utils import COOKIE_NAME, decode_token
+from config import settings as app_settings
+import user_settings as us
+
+logger = logging.getLogger(__name__)
+router = APIRouter()
+
+_STATIC = Path(__file__).parent.parent / "static"
+
+
+# ── Auth helper ───────────────────────────────────────────────────────────────
+
+def _get_user(request: Request) -> str | None:
+    token = request.cookies.get(COOKIE_NAME)
+    if not token:
+        return None
+    try:
+        return decode_token(token)
+    except jwt.InvalidTokenError:
+        return None
+
+
+# ── Page renderer ─────────────────────────────────────────────────────────────
+
+def _render(username: str, success: str = "", error: str = "") -> str:
+    cfg     = us.get_config(username)
+    hosts   = cfg["hosts"]
+    models  = cfg["models"]
+    active  = cfg.get("active_model_id")
+
+    # Build a host lookup for model rows
+    host_by_id = {h["id"]: h for h in hosts}
+
+    # ── Host section ──────────────────────────────────────────────────────────
+    if hosts:
+        h = hosts[0]   # one host for now
+        host_id_val  = h["id"]
+        host_label   = h.get("label", "")
+        host_url     = h.get("api_url", "")
+        host_key_hint = f"…{h['api_key'][-4:]}" if h.get("api_key") else "not set"
+    else:
+        host_id_val  = ""
+        host_label   = ""
+        host_url     = app_settings.local_api_url
+        host_key_hint = f"server default (…{app_settings.local_api_key[-4:]})" \
+                        if app_settings.local_api_key else "not set"
+
+    # ── Model rows ────────────────────────────────────────────────────────────
+    model_rows = ""
+    for m in models:
+        is_active = m["id"] == active
+        host      = host_by_id.get(m["host_id"], {})
+        host_name = host.get("label") or host.get("api_url") or "unknown host"
+        badge     = '<span class="active-badge">active</span>' if is_active else ""
+        activate_btn = (
+            '<span class="active-label">✓ Active</span>'
+            if is_active else
+            f'''<form method="POST" action="/settings/local/models/{m["id"]}/activate" style="display:inline">
+                  <button type="submit" class="row-btn">Set active</button>
+                </form>'''
+        )
+        model_rows += f'''
+        <div class="model-row{"  model-active" if is_active else ""}">
+          <div class="model-info">
+            <span class="model-label">{m.get("label") or m["model_name"]}</span>{badge}
+            <span class="model-name">{m["model_name"]}</span>
+            <span class="model-host">{host_name}</span>
+          </div>
+          <div class="model-actions">
+            {activate_btn}
+            <form method="POST" action="/settings/local/models/{m["id"]}/remove" style="display:inline"
+                  onsubmit="return confirm('Remove {m.get('label') or m['model_name']}?')">
+              <button type="submit" class="row-btn danger">Remove</button>
+            </form>
+          </div>
+        </div>'''
+
+    if not model_rows:
+        model_rows = '<p class="empty-note">No models added yet. Use "Add Model" below.</p>'
+
+    # ── Host select for Add Model ─────────────────────────────────────────────
+    host_options = "".join(
+        f'<option value="{h["id"]}">{h.get("label") or h["api_url"]}</option>'
+        for h in hosts
+    )
+    add_section_hidden = "" if hosts else ' style="display:none"'
+
+    html = (_STATIC / "local_llm.html").read_text()
+    first_host_id = hosts[0]["id"] if hosts else ""
+
+    html = html.replace("{{ username }}",         username)
+    html = html.replace("{{ host_id }}",          host_id_val)
+    html = html.replace("{{ host_label }}",       host_label)
+    html = html.replace("{{ host_url }}",         host_url)
+    html = html.replace("{{ host_key_hint }}",    host_key_hint)
+    html = html.replace("{{ model_rows }}",       model_rows)
+    html = html.replace("{{ host_options }}",     host_options)
+    html = html.replace("{{ first_host_id }}",    first_host_id)
+    html = html.replace("{{ add_section_hidden }}", add_section_hidden)
+    html = html.replace("{{ has_host }}",         "true" if hosts else "false")
+    if success:
+        html = html.replace("<!-- SUCCESS -->", f'<p class="msg success">{success}</p>')
+    if error:
+        html = html.replace("<!-- ERROR -->",   f'<p class="msg error">{error}</p>')
+    return html
+
+
+# ── Routes ────────────────────────────────────────────────────────────────────
+
+@router.get("/settings/local", include_in_schema=False)
+async def local_llm_page(request: Request):
+    username = _get_user(request)
+    if not username:
+        return RedirectResponse("/login", status_code=302)
+    return HTMLResponse(_render(username))
+
+
+@router.post("/settings/local/host", include_in_schema=False)
+async def save_host(
+    request: Request,
+    host_id:  str = Form(""),
+    label:    str = Form(""),
+    api_url:  str = Form(""),
+    api_key:  str = Form(""),
+):
+    username = _get_user(request)
+    if not username:
+        return RedirectResponse("/login", status_code=302)
+
+    if not api_url.strip():
+        return HTMLResponse(_render(username, error="API URL is required."))
+
+    us.save_host(username, host_id or None, label, api_url, api_key)
+    logger.info("local LLM host saved: %s", username)
+    return HTMLResponse(_render(username, success="Host saved."))
+
+
+@router.post("/settings/local/models/add", include_in_schema=False)
+async def add_model(
+    request:    Request,
+    host_id:    str = Form(...),
+    label:      str = Form(""),
+    model_name: str = Form(...),
+):
+    username = _get_user(request)
+    if not username:
+        return RedirectResponse("/login", status_code=302)
+
+    if not model_name.strip():
+        return HTMLResponse(_render(username, error="Model name is required."))
+
+    us.add_model(username, host_id, label, model_name)
+    logger.info("local model added: %s / %s", username, model_name)
+    return HTMLResponse(_render(username, success=f"Model \"{label or model_name}\" added."))
+
+
+@router.post("/settings/local/models/{model_id}/activate", include_in_schema=False)
+async def activate_model(request: Request, model_id: str):
+    username = _get_user(request)
+    if not username:
+        return RedirectResponse("/login", status_code=302)
+
+    if not us.set_active_model(username, model_id):
+        return HTMLResponse(_render(username, error="Model not found."))
+
+    logger.info("active local model set: %s / %s", username, model_id)
+    return HTMLResponse(_render(username, success="Active model updated."))
+
+
+@router.post("/settings/local/models/{model_id}/remove", include_in_schema=False)
+async def remove_model(request: Request, model_id: str):
+    username = _get_user(request)
+    if not username:
+        return RedirectResponse("/login", status_code=302)
+
+    us.remove_model(username, model_id)
+    logger.info("local model removed: %s / %s", username, model_id)
+    return HTMLResponse(_render(username, success="Model removed."))
+
+
+@router.get("/api/local-llm/fetch-models")
+async def fetch_models(request: Request) -> JSONResponse:
+    """Proxy to the configured host's /api/models endpoint.
+
+    Returns [{id, name}] sorted by name, or an error dict.
+    """
+    username = _get_user(request)
+    if not username:
+        return JSONResponse({"error": "Not authenticated"}, status_code=401)
+
+    cfg   = us.get_config(username)
+    hosts = cfg.get("hosts", [])
+
+    # Fall back to .env if no host configured yet
+    if hosts:
+        h       = hosts[0]
+        api_url = h.get("api_url", "")
+        api_key = h.get("api_key", "")
+    else:
+        api_url = app_settings.local_api_url
+        api_key = app_settings.local_api_key
+
+    if not api_url:
+        return JSONResponse({"error": "No host configured."}, status_code=400)
+
+    url     = api_url.rstrip("/") + "/api/models"
+    headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
+
+    try:
+        async with httpx.AsyncClient(timeout=8) as client:
+            resp = await client.get(url, headers=headers)
+        resp.raise_for_status()
+        data   = resp.json()
+        models = [
+            {"id": m["id"], "name": m.get("name") or m["id"]}
+            for m in data.get("data", [])
+        ]
+        models.sort(key=lambda m: m["name"].lower())
+        return JSONResponse({"models": models})
+    except httpx.HTTPStatusError as e:
+        return JSONResponse({"error": f"Host returned {e.response.status_code}"}, status_code=502)
+    except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=502)
--- a/cortex/routers/nextcloud_talk.py
+++ b/cortex/routers/nextcloud_talk.py
@@ -1,16 +1,13 @@
 import asyncio
-import hashlib
-import hmac
 import json
 import logging
-import secrets

-import httpx
 from fastapi import APIRouter, BackgroundTasks, HTTPException, Request, Response

 from auth_utils import get_user_channels
 from context_loader import load_context
 from llm_client import complete
+from notification import _send_nct_message
 from persona import set_context
 from session_logger import log_turn
 from session_store import load as load_session, save as save_session
@@ -40,38 +37,8 @@ def _verify_signature(body: bytes, random_header: str, sig_header: str, secret:

 async def _send_reply(conversation_token: str, message: str, nextcloud_url: str, secret: str) -> None:
    """Post a message to Nextcloud Talk as the bot."""
-    url = (
-        f"{nextcloud_url}/ocs/v2.php/apps/spreed/api/v1"
-        f"/bot/{conversation_token}/message"
-    )
-    # NC Talk verifies HMAC over (random + message_text), NOT the raw body.
-    # See BotController::getBotFromHeaders → checksumVerificationService::validateRequest($random, $sig, $secret, $message)
-    body_dict  = {"message": message}
-    body_bytes = json.dumps(body_dict, ensure_ascii=False).encode("utf-8")
-    random_str = secrets.token_hex(32)
-    sig = hmac.new(
-        secret.encode(),
-        (random_str + message).encode("utf-8"),
-        hashlib.sha256,
-    ).hexdigest()
-
-    logger.info("NCT _send_reply → %s (body: %s)", url, body_bytes.decode())
-    try:
-        async with httpx.AsyncClient() as client:
-            resp = await client.post(
-                url,
-                content=body_bytes,
-                headers={
-                    "Content-Type": "application/json",
-                    "OCS-APIRequest": "true",
-                    "X-Nextcloud-Talk-Bot-Random": random_str,
-                    "X-Nextcloud-Talk-Bot-Signature": sig,
-                },
-                timeout=15,
-            )
-        logger.info("NCT reply: %s — %s", resp.status_code, resp.text[:400])
-    except Exception as e:
-        logger.error("NCT reply error: %s", e)
+    logger.info("NCT _send_reply → room %s (%d chars)", conversation_token, len(message))
+    await _send_nct_message(nextcloud_url, secret, conversation_token, message)


 async def _process_message(
--- a/cortex/routers/settings.py
+++ b/cortex/routers/settings.py
@@ -55,6 +55,7 @@ def _settings_page(username: str, personas: list[str], success: str = "", error:
        hint = "Using server key"
    html = html.replace("{{ gemini_key_hint }}", hint)
    html = html.replace("{{ gemini_key_set }}", "true" if gemini_key else "false")
+
    persona_items = "\n".join(
        f'''<li>
          <a href="/{username}/{p}" class="persona-link">{p}</a>