feat: audit log, usage tracking UI, OpenAI orchestrator compaction, onboarding + docs
Tool audit log:
- Every orchestrator tool call logged to home/{user}/tool_audit/YYYY-MM-DD.jsonl
- Files panel sidebar: audit log group (collapsed), date-linked read-only table
- Admin endpoints: /api/audit/files, /api/audit/day, /api/audit/recent, /api/audit/stats
- Engine and model name recorded per entry
OpenAI orchestrator improvements:
- Context budget enforcement: 75% of model context_k (min 16k)
- Message compaction: truncates old tool results when approaching budget
- max_rounds respected per model config (intersected with server cap)
OpenRouter onboarding (setup.html, onboarding.py, app.js, settings.html):
- Step 3 of 3: /setup/model with curated model picker
- Chat banner for users on server-default model (informational, not alarmist)
- Settings quick-link card; /setup/model works standalone for existing users
Model registry + session store:
- set_role_config / get_role_config for per-role tool lists and system_append
- session_store: session rename, session name backfill endpoint
UI updates (app.js, index.html, style.css, local_llm.html):
- Role toggle in context panel
- Off-the-record mode
- Agent notes read-only viewer
- OPERATIONS.md loaded at T2+ in context
Documentation:
- HELP.md: full tool table, per-role tool sets, Agent Notes, usage tracking
- TOOLS.md: Agent Notes section, count corrected to 44
- ARCH__SYSTEM.md, ARCH__BACKENDS.md, MASTER.md updated to match reality
- CLAUDE.md: onboarding flow, documentation philosophy sections
- README.md: stack in practice, DeepSeek TUI mention, architecture diagram updated
- TODO__Agents.md: onboarding task completed with deviation notes
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -295,6 +295,53 @@ async def rename_session_endpoint(
|
||||
return {"ok": True, "session_id": session_id, "name": req.name.strip()}
|
||||
|
||||
|
||||
@router.post("/api/sessions/backfill-names")
|
||||
async def backfill_session_names(
|
||||
request: Request,
|
||||
user: str = Query(""),
|
||||
persona: str = Query(""),
|
||||
) -> dict:
|
||||
"""Name every unnamed session using its first user message (truncated to 60 chars).
|
||||
Idempotent — only touches sessions that have no name set.
|
||||
user/persona default to the JWT session user + last-used persona cookie."""
|
||||
# Resolve user from JWT if not provided
|
||||
if not user:
|
||||
token = request.cookies.get(COOKIE_NAME)
|
||||
if not token:
|
||||
raise HTTPException(status_code=401, detail="Not authenticated")
|
||||
try:
|
||||
user = decode_token(token)
|
||||
except jwt.InvalidTokenError:
|
||||
raise HTTPException(status_code=401, detail="Invalid session")
|
||||
|
||||
# Resolve persona from cookie if not provided
|
||||
if not persona:
|
||||
from persona import list_user_personas
|
||||
persona_cookie = request.cookies.get("cx_last_persona", "")
|
||||
available = list_user_personas(user)
|
||||
persona = persona_cookie if persona_cookie in available else (available[0] if available else "")
|
||||
if not persona:
|
||||
raise HTTPException(status_code=400, detail="No persona found for user")
|
||||
|
||||
_set_ctx(user, persona)
|
||||
sessions = list_all()
|
||||
named = 0
|
||||
for s in sessions:
|
||||
if s.get("name"):
|
||||
continue
|
||||
messages = load_session(s["session_id"])
|
||||
first_user = next((m for m in messages if m.get("role") == "user"), None)
|
||||
if not first_user:
|
||||
continue
|
||||
text = (first_user.get("content") or "").strip()
|
||||
if not text:
|
||||
continue
|
||||
auto_name = text[:60].rstrip() + ("…" if len(text) > 60 else "")
|
||||
rename_session(s["session_id"], auto_name)
|
||||
named += 1
|
||||
return {"ok": True, "named": named, "total": len(sessions)}
|
||||
|
||||
|
||||
@router.delete("/sessions/{session_id}")
|
||||
async def delete_session_endpoint(
|
||||
session_id: str,
|
||||
|
||||
@@ -1,25 +1,50 @@
|
||||
"""
|
||||
Manual memory distillation endpoints.
|
||||
|
||||
POST /distill/short — roll session logs → MEMORY_SHORT.md (no LLM)
|
||||
POST /distill/mid — summarize short → MEMORY_MID.md (LLM)
|
||||
POST /distill/long — integrate mid → MEMORY_LONG.md (LLM)
|
||||
POST /distill/all — run all three in sequence
|
||||
POST /distill/short — roll session logs → MEMORY_SHORT.md (no LLM)
|
||||
POST /distill/mid — summarize short → MEMORY_MID.md (LLM)
|
||||
POST /distill/long — integrate mid → MEMORY_LONG.md (LLM)
|
||||
POST /distill/all — run all three in sequence
|
||||
POST /distill/rebuild — wipe mid + long, then run all three from scratch
|
||||
|
||||
All endpoints require ?user=<username>&persona=<name> query params so distillation
|
||||
targets the correct persona. Without them, the request is rejected (no silent fallback
|
||||
to server defaults — that caused wrong-user distillation in a multi-user setup).
|
||||
All endpoints require ?user=<username>&persona=<name> query params.
|
||||
|
||||
Concurrency: one distillation at a time per persona. A second request while one
|
||||
is running returns 409 immediately — no silent queuing.
|
||||
"""
|
||||
import asyncio
|
||||
from datetime import datetime, timedelta
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from memory_distiller import distill_short, distill_mid, distill_long
|
||||
from persona import validate as validate_persona, set_context
|
||||
from persona import validate as validate_persona, set_context, persona_path as _persona_path
|
||||
import scheduler
|
||||
|
||||
router = APIRouter(prefix="/distill")
|
||||
|
||||
# Per-persona asyncio lock. Key: (user, persona)
|
||||
_LOCKS: dict[tuple, asyncio.Lock] = {}
|
||||
_LOCKS_META: dict[tuple, str] = {} # key → which step is currently running
|
||||
|
||||
# Minimum time between successive runs of each endpoint, per persona.
|
||||
# Prevents accidental rapid-fire runs and token waste.
|
||||
_COOLDOWNS: dict[tuple, timedelta] = {
|
||||
"short": timedelta(minutes=1),
|
||||
"mid": timedelta(minutes=30),
|
||||
"long": timedelta(hours=6),
|
||||
"all": timedelta(hours=1),
|
||||
"rebuild": timedelta(hours=6),
|
||||
}
|
||||
_LAST_RUN: dict[tuple, datetime] = {} # key: (user, persona, endpoint)
|
||||
|
||||
|
||||
def _get_lock(user: str, persona: str) -> asyncio.Lock:
|
||||
key = (user, persona)
|
||||
if key not in _LOCKS:
|
||||
_LOCKS[key] = asyncio.Lock()
|
||||
return _LOCKS[key]
|
||||
|
||||
|
||||
def _resolve(user: str, persona: str) -> tuple[str, str]:
|
||||
"""Validate and set persona context. Raises 404 if the persona doesn't exist."""
|
||||
try:
|
||||
u, p = validate_persona(user, persona)
|
||||
except Exception:
|
||||
@@ -28,13 +53,51 @@ def _resolve(user: str, persona: str) -> tuple[str, str]:
|
||||
return u, p
|
||||
|
||||
|
||||
def _check_lock(user: str, persona: str) -> asyncio.Lock:
|
||||
"""Return the lock if free, raise 409 if already held."""
|
||||
lock = _get_lock(user, persona)
|
||||
if lock.locked():
|
||||
step = _LOCKS_META.get((user, persona), "distillation")
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail=f"A {step} is already running for {persona} — please wait for it to finish.",
|
||||
)
|
||||
return lock
|
||||
|
||||
|
||||
def _check_cooldown(user: str, persona: str, endpoint: str) -> None:
|
||||
"""Raise 429 if the endpoint was run too recently for this persona."""
|
||||
cooldown = _COOLDOWNS.get(endpoint)
|
||||
if not cooldown:
|
||||
return
|
||||
key = (user, persona, endpoint)
|
||||
last = _LAST_RUN.get(key)
|
||||
if last:
|
||||
elapsed = datetime.now() - last
|
||||
if elapsed < cooldown:
|
||||
remaining = cooldown - elapsed
|
||||
mins = int(remaining.total_seconds() // 60)
|
||||
secs = int(remaining.total_seconds() % 60)
|
||||
wait = f"{mins}m {secs}s" if mins else f"{secs}s"
|
||||
raise HTTPException(
|
||||
status_code=429,
|
||||
detail=f"{endpoint} was just run — please wait {wait} before running again.",
|
||||
)
|
||||
|
||||
|
||||
def _record_run(user: str, persona: str, endpoint: str) -> None:
|
||||
_LAST_RUN[(user, persona, endpoint)] = datetime.now()
|
||||
|
||||
|
||||
@router.get("/status")
|
||||
async def distill_status() -> dict:
|
||||
"""Show auto-distillation schedule and next run times."""
|
||||
from config import settings
|
||||
# Include which personas are currently distilling
|
||||
active = [f"{u}/{p}" for (u, p), lock in _LOCKS.items() if lock.locked()]
|
||||
return {
|
||||
"enabled": settings.auto_distill,
|
||||
"jobs": scheduler.status(),
|
||||
"active": active,
|
||||
"config": {
|
||||
"short": settings.auto_distill_short,
|
||||
"mid": settings.auto_distill_mid,
|
||||
@@ -49,7 +112,16 @@ async def do_distill_short(
|
||||
persona: str = Query(...),
|
||||
) -> dict:
|
||||
u, p = _resolve(user, persona)
|
||||
return {"ok": True, **distill_short(u, p)}
|
||||
_check_cooldown(u, p, "short")
|
||||
lock = _check_lock(u, p)
|
||||
async with lock:
|
||||
_LOCKS_META[(u, p)] = "short distill"
|
||||
try:
|
||||
result = distill_short(u, p)
|
||||
_record_run(u, p, "short")
|
||||
return {"ok": True, **result}
|
||||
finally:
|
||||
_LOCKS_META.pop((u, p), None)
|
||||
|
||||
|
||||
@router.post("/mid")
|
||||
@@ -58,8 +130,17 @@ async def do_distill_mid(
|
||||
persona: str = Query(...),
|
||||
) -> dict:
|
||||
u, p = _resolve(user, persona)
|
||||
result = await distill_mid(u, p)
|
||||
return {"ok": "error" not in result, **result}
|
||||
_check_cooldown(u, p, "mid")
|
||||
lock = _check_lock(u, p)
|
||||
async with lock:
|
||||
_LOCKS_META[(u, p)] = "mid distill"
|
||||
try:
|
||||
result = await distill_mid(u, p)
|
||||
if "error" not in result:
|
||||
_record_run(u, p, "mid")
|
||||
return {"ok": "error" not in result, **result}
|
||||
finally:
|
||||
_LOCKS_META.pop((u, p), None)
|
||||
|
||||
|
||||
@router.post("/long")
|
||||
@@ -68,8 +149,17 @@ async def do_distill_long(
|
||||
persona: str = Query(...),
|
||||
) -> dict:
|
||||
u, p = _resolve(user, persona)
|
||||
result = await distill_long(u, p)
|
||||
return {"ok": "error" not in result, **result}
|
||||
_check_cooldown(u, p, "long")
|
||||
lock = _check_lock(u, p)
|
||||
async with lock:
|
||||
_LOCKS_META[(u, p)] = "long distill"
|
||||
try:
|
||||
result = await distill_long(u, p)
|
||||
if "error" not in result:
|
||||
_record_run(u, p, "long")
|
||||
return {"ok": "error" not in result, **result}
|
||||
finally:
|
||||
_LOCKS_META.pop((u, p), None)
|
||||
|
||||
|
||||
@router.post("/all")
|
||||
@@ -78,14 +168,71 @@ async def do_distill_all(
|
||||
persona: str = Query(...),
|
||||
) -> dict:
|
||||
u, p = _resolve(user, persona)
|
||||
short_result = distill_short(u, p)
|
||||
mid_result = await distill_mid(u, p)
|
||||
if "error" in mid_result:
|
||||
return {"ok": False, "short": short_result, "mid": mid_result}
|
||||
long_result = await distill_long(u, p)
|
||||
return {
|
||||
"ok": "error" not in long_result,
|
||||
"short": short_result,
|
||||
"mid": mid_result,
|
||||
"long": long_result,
|
||||
}
|
||||
_check_cooldown(u, p, "all")
|
||||
lock = _check_lock(u, p)
|
||||
async with lock:
|
||||
_LOCKS_META[(u, p)] = "full distill"
|
||||
try:
|
||||
short_result = distill_short(u, p)
|
||||
mid_result = await distill_mid(u, p)
|
||||
if "error" in mid_result:
|
||||
return {"ok": False, "short": short_result, "mid": mid_result}
|
||||
long_result = await distill_long(u, p)
|
||||
ok = "error" not in long_result
|
||||
if ok:
|
||||
_record_run(u, p, "all")
|
||||
return {
|
||||
"ok": ok,
|
||||
"short": short_result,
|
||||
"mid": mid_result,
|
||||
"long": long_result,
|
||||
}
|
||||
finally:
|
||||
_LOCKS_META.pop((u, p), None)
|
||||
|
||||
|
||||
@router.post("/rebuild")
|
||||
async def do_distill_rebuild(
|
||||
user: str = Query(...),
|
||||
persona: str = Query(...),
|
||||
) -> dict: # noqa: E501
|
||||
"""Wipe MEMORY_MID and MEMORY_LONG (with backups), then run short → mid → long.
|
||||
|
||||
Use when memories have drifted, been corrupted, or you want a clean slate
|
||||
rebuilt purely from session logs. Hand-edited content will be replaced.
|
||||
"""
|
||||
u, p = _resolve(user, persona)
|
||||
_check_cooldown(u, p, "rebuild")
|
||||
lock = _check_lock(u, p)
|
||||
async with lock:
|
||||
_LOCKS_META[(u, p)] = "memory rebuild"
|
||||
try:
|
||||
from memory_distiller import _rotate_backup, _read
|
||||
inara_dir = _persona_path(u, p)
|
||||
|
||||
# Back up then wipe mid and long before rebuilding
|
||||
for name in ("MEMORY_MID.md", "MEMORY_LONG.md"):
|
||||
path = inara_dir / name
|
||||
if path.exists():
|
||||
_rotate_backup(path)
|
||||
path.write_text(
|
||||
f"# {name}\n\n*Cleared for rebuild — {__import__('datetime').datetime.now().strftime('%Y-%m-%d %H:%M')}.*\n"
|
||||
)
|
||||
|
||||
short_result = distill_short(u, p)
|
||||
mid_result = await distill_mid(u, p)
|
||||
if "error" in mid_result:
|
||||
return {"ok": False, "short": short_result, "mid": mid_result, "rebuilt": True}
|
||||
long_result = await distill_long(u, p)
|
||||
ok = "error" not in long_result
|
||||
if ok:
|
||||
_record_run(u, p, "rebuild")
|
||||
return {
|
||||
"ok": ok,
|
||||
"short": short_result,
|
||||
"mid": mid_result,
|
||||
"long": long_result,
|
||||
"rebuilt": True,
|
||||
}
|
||||
finally:
|
||||
_LOCKS_META.pop((u, p), None)
|
||||
|
||||
@@ -27,10 +27,21 @@ ALLOWED = {
|
||||
"MEMORY_SHORT.bak1.md",
|
||||
"MEMORY_SHORT.bak2.md",
|
||||
"HELP.md",
|
||||
# Agent private notes — backups only; AGENT_NOTES.md itself is agent-only
|
||||
"AGENT_NOTES.bak1.md",
|
||||
"AGENT_NOTES.bak2.md",
|
||||
"AGENT_NOTES.bak3.md",
|
||||
}
|
||||
|
||||
# Files that can be read via the panel but not written by users
|
||||
READ_ONLY = {
|
||||
"AGENT_NOTES.bak1.md",
|
||||
"AGENT_NOTES.bak2.md",
|
||||
"AGENT_NOTES.bak3.md",
|
||||
}
|
||||
|
||||
# Files served from home/{user}/ instead of persona path
|
||||
USER_FILES = {"email_allowlist.json"}
|
||||
USER_FILES = {"email_allowlist.json", "usage.json"}
|
||||
|
||||
|
||||
def _resolve(user: str, persona: str) -> None:
|
||||
@@ -92,7 +103,11 @@ async def get_file(
|
||||
p = _path(filename, user=user)
|
||||
if not p.exists():
|
||||
raise HTTPException(status_code=404, detail=f"{filename} does not exist")
|
||||
return {"name": filename, "content": p.read_text()}
|
||||
return {
|
||||
"name": filename,
|
||||
"content": p.read_text(),
|
||||
"readonly": filename in READ_ONLY,
|
||||
}
|
||||
|
||||
|
||||
class FileWrite(BaseModel):
|
||||
@@ -106,6 +121,8 @@ async def save_file(
|
||||
user: str = Query("scott"),
|
||||
persona: str = Query("inara"),
|
||||
) -> dict:
|
||||
if filename in READ_ONLY:
|
||||
raise HTTPException(status_code=403, detail=f"{filename} is read-only.")
|
||||
_resolve(user, persona)
|
||||
p = _path(filename, user=user)
|
||||
p.write_text(req.content)
|
||||
|
||||
@@ -159,7 +159,8 @@ def _render(username: str, success: str = "", error: str = "") -> str:
|
||||
else:
|
||||
secondary = default_secondary
|
||||
|
||||
ctx = f'<span class="ctx-badge">{m.get("context_k",0)}k</span>' if m.get("context_k") else ""
|
||||
ctx = f'<span class="ctx-badge">{m.get("context_k",0)}k</span>' if m.get("context_k") else ""
|
||||
no_tools = '' if m.get("tools", True) else '<span class="pbadge pb-notools">no tools</span>'
|
||||
tags_html = " ".join(f'<span class="tag">{t}</span>' for t in (m.get("tags") or []))
|
||||
sec = f'<span class="model-host">{secondary}</span>' if secondary else ""
|
||||
|
||||
@@ -201,13 +202,15 @@ def _render(username: str, success: str = "", error: str = "") -> str:
|
||||
cur_label = m.get("label", "")
|
||||
cur_model_name = m.get("model_name", "")
|
||||
cur_ctx = m.get("context_k", 0) or 0
|
||||
cur_max_rounds = m.get("max_rounds") or 0
|
||||
cur_tools = m.get("tools", True)
|
||||
cur_tags = ", ".join(m.get("tags") or [])
|
||||
|
||||
model_rows += f'''
|
||||
<div class="model-row" id="model-{m["id"]}">
|
||||
<div class="model-row-header">
|
||||
<div class="model-info">
|
||||
<div>{badge}<span class="model-label">{m.get("label") or m.get("model_name","")}</span>{ctx}</div>
|
||||
<div>{badge}<span class="model-label">{m.get("label") or m.get("model_name","")}</span>{ctx}{no_tools}</div>
|
||||
<span class="model-name">{m.get("model_name","")}</span>
|
||||
{sec}
|
||||
<div class="tag-row">{tags_html}</div>
|
||||
@@ -239,8 +242,22 @@ def _render(username: str, success: str = "", error: str = "") -> str:
|
||||
{extra_fields}
|
||||
<div class="field-row">
|
||||
<div class="field" style="flex:0 0 auto">
|
||||
<label>Context (k)</label>
|
||||
<input type="number" name="context_k" value="{cur_ctx}" min="0">
|
||||
<label title="Context window size in thousands of tokens. 0 = assume 32k.">Context (k)</label>
|
||||
<input type="number" name="context_k" value="{cur_ctx}" min="0"
|
||||
title="Context window size in thousands of tokens. 0 = assume 32k (compaction budget ~24k tokens).">
|
||||
</div>
|
||||
<div class="field" style="flex:0 0 auto">
|
||||
<label title="Per-model tool loop cap. 0 = use the global default (orchestrator_max_rounds).">Max rounds</label>
|
||||
<input type="number" name="max_rounds" value="{cur_max_rounds}" min="0"
|
||||
title="Per-model tool loop cap. 0 = use the global default (orchestrator_max_rounds).">
|
||||
</div>
|
||||
<div class="field" style="flex:0 0 auto">
|
||||
<label title="Whether this model supports tool calling. If not supported, requests skip the tool loop entirely.">Tool calling</label>
|
||||
<select name="tools"
|
||||
title="Whether this model supports tool calling. If not supported, requests skip the tool loop entirely.">
|
||||
<option value="1" {'selected' if cur_tools else ''}>Supported</option>
|
||||
<option value="0" {'' if cur_tools else 'selected'}>Not supported</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="field">
|
||||
<label>Tags</label>
|
||||
@@ -426,6 +443,8 @@ async def add_model(
|
||||
provider: str = Form("local"),
|
||||
label: str = Form(""),
|
||||
context_k: int = Form(0),
|
||||
max_rounds: int = Form(0),
|
||||
tools: int = Form(1),
|
||||
tags: str = Form(""),
|
||||
# local-only fields
|
||||
host_id: str = Form(""),
|
||||
@@ -439,14 +458,17 @@ async def add_model(
|
||||
if not username:
|
||||
return RedirectResponse("/login", status_code=302)
|
||||
|
||||
tag_list = [t.strip() for t in tags.split(",") if t.strip()]
|
||||
tag_list = [t.strip() for t in tags.split(",") if t.strip()]
|
||||
max_rounds_ = max_rounds or None
|
||||
tools_bool = tools != 0
|
||||
|
||||
if provider == "local":
|
||||
if not model_name.strip():
|
||||
return HTMLResponse(_render(username, error="Model name is required."))
|
||||
if not host_id.strip():
|
||||
return HTMLResponse(_render(username, error="Select a host."))
|
||||
reg.save_model(username, None, host_id, label, model_name, context_k, tag_list)
|
||||
reg.save_model(username, None, host_id, label, model_name, context_k, tag_list,
|
||||
max_rounds=max_rounds_, tools=tools_bool)
|
||||
display = label or model_name
|
||||
|
||||
elif provider in ("google", "anthropic"):
|
||||
@@ -459,6 +481,7 @@ async def add_model(
|
||||
account_id=account_id or None,
|
||||
credential_id=credential_id or None,
|
||||
context_k=context_k, tags=tag_list,
|
||||
max_rounds=max_rounds_, tools=tools_bool,
|
||||
)
|
||||
display = label or cloud_model_name
|
||||
else:
|
||||
@@ -476,6 +499,8 @@ async def edit_model(
|
||||
label: str = Form(""),
|
||||
model_name: str = Form(""),
|
||||
context_k: int = Form(0),
|
||||
max_rounds: int = Form(0),
|
||||
tools: int = Form(1),
|
||||
tags: str = Form(""),
|
||||
host_id: str = Form(""),
|
||||
account_id: str = Form(""),
|
||||
@@ -486,17 +511,22 @@ async def edit_model(
|
||||
return RedirectResponse("/login", status_code=302)
|
||||
if not model_name.strip():
|
||||
return HTMLResponse(_render(username, error="Model name is required."))
|
||||
tag_list = [t.strip() for t in tags.split(",") if t.strip()]
|
||||
tag_list = [t.strip() for t in tags.split(",") if t.strip()]
|
||||
max_rounds_ = max_rounds or None
|
||||
tools_bool = tools != 0
|
||||
if mtype == "local_openai":
|
||||
if not host_id.strip():
|
||||
return HTMLResponse(_render(username, error="Select a host for this model."))
|
||||
reg.save_model(username, model_id, host_id, label, model_name, context_k, tag_list)
|
||||
reg.save_model(username, model_id, host_id, label, model_name, context_k, tag_list,
|
||||
max_rounds=max_rounds_, tools=tools_bool)
|
||||
elif mtype == "gemini_api":
|
||||
reg.save_cloud_model(username, model_id, "google", model_name, label,
|
||||
account_id=account_id or None, context_k=context_k, tags=tag_list)
|
||||
account_id=account_id or None, context_k=context_k, tags=tag_list,
|
||||
max_rounds=max_rounds_, tools=tools_bool)
|
||||
elif mtype == "claude_cli":
|
||||
reg.save_cloud_model(username, model_id, "anthropic", model_name, label,
|
||||
credential_id=credential_id or "cli", context_k=context_k, tags=tag_list)
|
||||
credential_id=credential_id or "cli", context_k=context_k, tags=tag_list,
|
||||
max_rounds=max_rounds_, tools=tools_bool)
|
||||
else:
|
||||
return HTMLResponse(_render(username, error=f"Unknown model type: {mtype}"))
|
||||
display = label.strip() or model_name.strip()
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
"""
|
||||
Onboarding router — invite-based setup + persona creation.
|
||||
Onboarding router — invite-based setup + persona creation + model connect.
|
||||
|
||||
Routes:
|
||||
GET /setup/{token} → show password setup form (step 1)
|
||||
POST /setup/{token} → set password, redirect to persona step
|
||||
GET /setup/persona → show persona creation form (step 2, requires auth)
|
||||
POST /setup/persona → create persona, redirect to /{user}/{persona}
|
||||
POST /setup/persona → create persona, redirect to /setup/model
|
||||
GET /setup/model → OpenRouter quick-connect (step 3, also standalone)
|
||||
POST /setup/model → save host + model + assign to chat role, redirect to chat
|
||||
"""
|
||||
|
||||
import logging
|
||||
@@ -21,6 +23,7 @@ from auth_utils import (
|
||||
)
|
||||
from persona_template import create_persona
|
||||
from persona import list_user_personas, validate as validate_persona
|
||||
import model_registry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/setup")
|
||||
@@ -114,7 +117,11 @@ async def persona_submit(
|
||||
description=description.strip(),
|
||||
)
|
||||
logger.info("persona created: %s/%s", username, persona_name)
|
||||
return RedirectResponse(f"/{username}/{persona_name}", status_code=302)
|
||||
# Step 3: guided model setup before entering the chat
|
||||
resp = RedirectResponse("/setup/model", status_code=302)
|
||||
# Remember which persona to land on after model setup
|
||||
resp.set_cookie("cx_setup_persona", f"{username}/{persona_name}", max_age=3600, httponly=True, samesite="lax")
|
||||
return resp
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -178,3 +185,126 @@ async def setup_submit(
|
||||
return resp
|
||||
|
||||
return HTMLResponse(_setup_page("Unknown step."), status_code=400)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Step 3 — model connect (OpenRouter quick-connect, also standalone)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Curated model list shown in the Step 3 dropdown.
|
||||
_OPENROUTER_MODELS = [
|
||||
("anthropic/claude-3-5-haiku-20241022", "Claude 3.5 Haiku — Fast & affordable"),
|
||||
("anthropic/claude-3-7-sonnet-20250219", "Claude 3.7 Sonnet — Smarter Claude"),
|
||||
("google/gemini-2.0-flash-001", "Gemini 2.0 Flash — Fast Google model"),
|
||||
("meta-llama/llama-3.3-70b-instruct", "Llama 3.3 70B — Open source"),
|
||||
]
|
||||
|
||||
|
||||
def _model_page(error: str = "", from_setup: bool = False) -> str:
|
||||
html = (_STATIC / "setup.html").read_text()
|
||||
# Hide steps 1 and 2 inline; show step 3
|
||||
html = html.replace('<div id="step-password">', '<div id="step-password" style="display:none">')
|
||||
html = html.replace('<div id="step-persona" style="display:none">', '<div id="step-persona" style="display:none">')
|
||||
html = html.replace('<div id="step-model" style="display:none">', '<div id="step-model">')
|
||||
if from_setup:
|
||||
html = html.replace("<!-- SETUP_STEP3_LABEL -->", "Step 3 of 3")
|
||||
if error:
|
||||
html = html.replace("<!-- ERROR_MODEL -->", f'<p class="error">{error}</p>')
|
||||
return html
|
||||
|
||||
|
||||
@router.post("/model/skip", include_in_schema=False)
|
||||
async def model_skip(request: Request):
|
||||
"""Skip model setup — redirect to the remembered persona or user root."""
|
||||
from auth_utils import decode_token
|
||||
import jwt
|
||||
token = request.cookies.get(COOKIE_NAME)
|
||||
username = None
|
||||
if token:
|
||||
try:
|
||||
username = decode_token(token)
|
||||
except jwt.InvalidTokenError:
|
||||
pass
|
||||
|
||||
dest_cookie = request.cookies.get("cx_setup_persona", "")
|
||||
dest = f"/{dest_cookie}" if dest_cookie else (f"/{username}" if username else "/")
|
||||
resp = RedirectResponse(dest, status_code=302)
|
||||
resp.delete_cookie("cx_setup_persona")
|
||||
return resp
|
||||
|
||||
|
||||
@router.get("/model", include_in_schema=False)
|
||||
async def model_page(request: Request):
|
||||
from auth_utils import decode_token
|
||||
import jwt
|
||||
token = request.cookies.get(COOKIE_NAME)
|
||||
if not token:
|
||||
return RedirectResponse("/login", status_code=302)
|
||||
try:
|
||||
decode_token(token)
|
||||
except jwt.InvalidTokenError:
|
||||
return RedirectResponse("/login", status_code=302)
|
||||
|
||||
from_setup = bool(request.cookies.get("cx_setup_persona"))
|
||||
return HTMLResponse(_model_page(from_setup=from_setup))
|
||||
|
||||
|
||||
@router.post("/model", include_in_schema=False)
|
||||
async def model_submit(
|
||||
request: Request,
|
||||
api_key: str = Form(...),
|
||||
model_name: str = Form(...),
|
||||
):
|
||||
from auth_utils import decode_token
|
||||
import jwt
|
||||
token = request.cookies.get(COOKIE_NAME)
|
||||
if not token:
|
||||
return RedirectResponse("/login", status_code=302)
|
||||
try:
|
||||
username = decode_token(token)
|
||||
except jwt.InvalidTokenError:
|
||||
return RedirectResponse("/login", status_code=302)
|
||||
|
||||
api_key = api_key.strip()
|
||||
model_name = model_name.strip()
|
||||
|
||||
if not api_key:
|
||||
from_setup = bool(request.cookies.get("cx_setup_persona"))
|
||||
return HTMLResponse(_model_page("API key is required.", from_setup=from_setup), status_code=422)
|
||||
|
||||
# Save OpenRouter as a host
|
||||
host_id = model_registry.save_host(
|
||||
username=username,
|
||||
host_id=None,
|
||||
label="OpenRouter",
|
||||
api_url="https://openrouter.ai/api/v1",
|
||||
api_key=api_key,
|
||||
host_type="openai",
|
||||
)
|
||||
|
||||
# Find label for selected model
|
||||
label = next((lbl for mn, lbl in _OPENROUTER_MODELS if mn == model_name), model_name)
|
||||
label = label.split(" — ")[0] # keep just the model name part
|
||||
|
||||
# Save model entry
|
||||
mid = model_registry.save_model(
|
||||
username=username,
|
||||
model_id=None,
|
||||
host_id=host_id,
|
||||
label=label,
|
||||
model_name=model_name,
|
||||
context_k=128,
|
||||
tools=True,
|
||||
)
|
||||
|
||||
# Assign as chat role primary
|
||||
model_registry.set_role(username, "chat", "primary", mid)
|
||||
logger.info("openrouter setup complete: %s → %s", username, model_name)
|
||||
|
||||
# Redirect to chat (use remembered persona, or user root)
|
||||
dest_cookie = request.cookies.get("cx_setup_persona", "")
|
||||
dest = f"/{dest_cookie}" if dest_cookie else f"/{username}"
|
||||
|
||||
resp = RedirectResponse(dest, status_code=302)
|
||||
resp.delete_cookie("cx_setup_persona")
|
||||
return resp
|
||||
|
||||
Reference in New Issue
Block a user