feat: local LLM multi-model, session search, cron proactive types, notifications, docs overhaul

Local LLM: - user_settings.py: per-user hosts/models config (local_llm.json) - routers/local_llm.py + static/local_llm.html: dedicated settings page - llm_client.py: local OpenAI-compatible backend via httpx - config.py: LOCAL_API_URL/KEY/MODEL + per-backend timeouts - Active model shown near backend toggle (amber hint text) Memory distillation: - memory_distiller.py: DISTILL_BACKEND_MID/LONG .env overrides - scheduler.py + notification.py: notify NC Talk after mid/long distill - notification.py: outbound channel abstraction (NC Talk, extensible) Session search: - routers/files.py: GET /sessions/search?q= with excerpts grouped by date - static/index.html + app.js: search UI in file sidebar with highlight - _esc() helper to prevent XSS in search results Proactive cron: - cron_runner.py: new job types — message (send directly) and brief (LLM + send) - Both support optional per-job channel override Channels: - routers/nextcloud_talk.py: consolidated using notification._send_nct_message() - routers/auth.py: local backend status in /auth/status - routers/chat.py: /backend returns {primary, fallback, local_model} object UI / UX: - Copy button for user messages (matching assistant) - Autocomplete disabled on sensitive form fields - settings.html: local model section replaced with link to /settings/local Docs overhaul: - MASTER.md hub + ARCH__SYSTEM/BACKENDS/PERSONA/CHANNELS/FUTURE.md - ARCH__Intelligence_Layer.md replaced with redirect table - CORTEX.md trimmed to vision only; README updated - OPEN_WEBUI_API.md added to docs/ Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-05 20:53:06 -04:00
parent bd6532e93a
commit a4daebdc9b
33 changed files with 2985 additions and 486 deletions
--- a/cortex/routers/chat.py
+++ b/cortex/routers/chat.py
@@ -1,6 +1,7 @@
 import asyncio
 import json
-from fastapi import APIRouter, HTTPException, Query
+import jwt
+from fastapi import APIRouter, HTTPException, Query, Request
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
 from context_loader import load_context
@@ -9,6 +10,8 @@ from session_logger import log_turn
 from session_store import load as load_session, save as save_session, list_all, generate_session_id, delete as delete_session, rename as rename_session
 from config import settings
 from persona import set_context, validate as validate_persona
+from auth_utils import COOKIE_NAME, decode_token
+import user_settings
 import event_bus


@@ -29,7 +32,7 @@ class ChatRequest(BaseModel):


 class BackendRequest(BaseModel):
-    primary: str  # "claude" or "gemini"
+    primary: str  # "claude", "gemini", or "local"


 class NoteRequest(BaseModel):
@@ -130,19 +133,45 @@ async def chat(req: ChatRequest) -> StreamingResponse:
    )


+_BACKEND_CYCLE = ("claude", "gemini", "local")
+_BACKEND_FALLBACK = {"claude": "gemini", "gemini": "claude", "local": "claude"}
+
+
+def _local_model_info(request: Request) -> dict | None:
+    """Return active local model {label, model_name} for the session user, or None."""
+    try:
+        token    = request.cookies.get(COOKIE_NAME)
+        username = decode_token(token) if token else None
+        if not username:
+            return None
+        cfg = user_settings.get_active_local_model(username)
+        if cfg:
+            return {"label": cfg["label"], "model_name": cfg["model_name"]}
+    except (jwt.InvalidTokenError, Exception):
+        pass
+    return None
+
+
@router.get("/backend")
-async def get_backend() -> dict:
-    other = "gemini" if settings.primary_backend == "claude" else "claude"
-    return {"primary": settings.primary_backend, "fallback": other}
+async def get_backend(request: Request) -> dict:
+    p = settings.primary_backend
+    return {
+        "primary":      p,
+        "fallback":     _BACKEND_FALLBACK.get(p, "claude"),
+        "local_model":  _local_model_info(request),
+    }


@router.post("/backend")
-async def set_backend(req: BackendRequest) -> dict:
-    if req.primary not in ("claude", "gemini"):
-        raise HTTPException(status_code=400, detail="primary must be 'claude' or 'gemini'")
+async def set_backend(req: BackendRequest, request: Request) -> dict:
+    if req.primary not in _BACKEND_CYCLE:
+        raise HTTPException(status_code=400, detail="primary must be 'claude', 'gemini', or 'local'")
    settings.primary_backend = req.primary
-    other = "gemini" if req.primary == "claude" else "claude"
-    return {"primary": settings.primary_backend, "fallback": other}
+    return {
+        "primary":     req.primary,
+        "fallback":    _BACKEND_FALLBACK[req.primary],
+        "local_model": _local_model_info(request),
+    }


 def _set_ctx(user: str, persona: str) -> None: