Tool audit log:
- Every orchestrator tool call logged to home/{user}/tool_audit/YYYY-MM-DD.jsonl
- Files panel sidebar: audit log group (collapsed), date-linked read-only table
- Admin endpoints: /api/audit/files, /api/audit/day, /api/audit/recent, /api/audit/stats
- Engine and model name recorded per entry
OpenAI orchestrator improvements:
- Context budget enforcement: 75% of model context_k (min 16k)
- Message compaction: truncates old tool results when approaching budget
- max_rounds respected per model config (intersected with server cap)
OpenRouter onboarding (setup.html, onboarding.py, app.js, settings.html):
- Step 3 of 3: /setup/model with curated model picker
- Chat banner for users on server-default model (informational, not alarmist)
- Settings quick-link card; /setup/model works standalone for existing users
Model registry + session store:
- set_role_config / get_role_config for per-role tool lists and system_append
- session_store: session rename, session name backfill endpoint
UI updates (app.js, index.html, style.css, local_llm.html):
- Role toggle in context panel
- Off-the-record mode
- Agent notes read-only viewer
- OPERATIONS.md loaded at T2+ in context
Documentation:
- HELP.md: full tool table, per-role tool sets, Agent Notes, usage tracking
- TOOLS.md: Agent Notes section, count corrected to 44
- ARCH__SYSTEM.md, ARCH__BACKENDS.md, MASTER.md updated to match reality
- CLAUDE.md: onboarding flow, documentation philosophy sections
- README.md: stack in practice, DeepSeek TUI mention, architecture diagram updated
- TODO__Agents.md: onboarding task completed with deviation notes
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
239 lines
7.8 KiB
Python
239 lines
7.8 KiB
Python
"""
|
|
Manual memory distillation endpoints.
|
|
|
|
POST /distill/short — roll session logs → MEMORY_SHORT.md (no LLM)
|
|
POST /distill/mid — summarize short → MEMORY_MID.md (LLM)
|
|
POST /distill/long — integrate mid → MEMORY_LONG.md (LLM)
|
|
POST /distill/all — run all three in sequence
|
|
POST /distill/rebuild — wipe mid + long, then run all three from scratch
|
|
|
|
All endpoints require ?user=<username>&persona=<name> query params.
|
|
|
|
Concurrency: one distillation at a time per persona. A second request while one
|
|
is running returns 409 immediately — no silent queuing.
|
|
"""
|
|
import asyncio
|
|
from datetime import datetime, timedelta
|
|
from fastapi import APIRouter, HTTPException, Query
|
|
from memory_distiller import distill_short, distill_mid, distill_long
|
|
from persona import validate as validate_persona, set_context, persona_path as _persona_path
|
|
import scheduler
|
|
|
|
router = APIRouter(prefix="/distill")
|
|
|
|
# Per-persona asyncio lock. Key: (user, persona)
|
|
_LOCKS: dict[tuple, asyncio.Lock] = {}
|
|
_LOCKS_META: dict[tuple, str] = {} # key → which step is currently running
|
|
|
|
# Minimum time between successive runs of each endpoint, per persona.
|
|
# Prevents accidental rapid-fire runs and token waste.
|
|
_COOLDOWNS: dict[tuple, timedelta] = {
|
|
"short": timedelta(minutes=1),
|
|
"mid": timedelta(minutes=30),
|
|
"long": timedelta(hours=6),
|
|
"all": timedelta(hours=1),
|
|
"rebuild": timedelta(hours=6),
|
|
}
|
|
_LAST_RUN: dict[tuple, datetime] = {} # key: (user, persona, endpoint)
|
|
|
|
|
|
def _get_lock(user: str, persona: str) -> asyncio.Lock:
|
|
key = (user, persona)
|
|
if key not in _LOCKS:
|
|
_LOCKS[key] = asyncio.Lock()
|
|
return _LOCKS[key]
|
|
|
|
|
|
def _resolve(user: str, persona: str) -> tuple[str, str]:
|
|
try:
|
|
u, p = validate_persona(user, persona)
|
|
except Exception:
|
|
raise HTTPException(status_code=404, detail=f"Persona not found: {user}/{persona}")
|
|
set_context(u, p)
|
|
return u, p
|
|
|
|
|
|
def _check_lock(user: str, persona: str) -> asyncio.Lock:
|
|
"""Return the lock if free, raise 409 if already held."""
|
|
lock = _get_lock(user, persona)
|
|
if lock.locked():
|
|
step = _LOCKS_META.get((user, persona), "distillation")
|
|
raise HTTPException(
|
|
status_code=409,
|
|
detail=f"A {step} is already running for {persona} — please wait for it to finish.",
|
|
)
|
|
return lock
|
|
|
|
|
|
def _check_cooldown(user: str, persona: str, endpoint: str) -> None:
|
|
"""Raise 429 if the endpoint was run too recently for this persona."""
|
|
cooldown = _COOLDOWNS.get(endpoint)
|
|
if not cooldown:
|
|
return
|
|
key = (user, persona, endpoint)
|
|
last = _LAST_RUN.get(key)
|
|
if last:
|
|
elapsed = datetime.now() - last
|
|
if elapsed < cooldown:
|
|
remaining = cooldown - elapsed
|
|
mins = int(remaining.total_seconds() // 60)
|
|
secs = int(remaining.total_seconds() % 60)
|
|
wait = f"{mins}m {secs}s" if mins else f"{secs}s"
|
|
raise HTTPException(
|
|
status_code=429,
|
|
detail=f"{endpoint} was just run — please wait {wait} before running again.",
|
|
)
|
|
|
|
|
|
def _record_run(user: str, persona: str, endpoint: str) -> None:
|
|
_LAST_RUN[(user, persona, endpoint)] = datetime.now()
|
|
|
|
|
|
@router.get("/status")
|
|
async def distill_status() -> dict:
|
|
from config import settings
|
|
# Include which personas are currently distilling
|
|
active = [f"{u}/{p}" for (u, p), lock in _LOCKS.items() if lock.locked()]
|
|
return {
|
|
"enabled": settings.auto_distill,
|
|
"jobs": scheduler.status(),
|
|
"active": active,
|
|
"config": {
|
|
"short": settings.auto_distill_short,
|
|
"mid": settings.auto_distill_mid,
|
|
"long": settings.auto_distill_long,
|
|
},
|
|
}
|
|
|
|
|
|
@router.post("/short")
|
|
async def do_distill_short(
|
|
user: str = Query(...),
|
|
persona: str = Query(...),
|
|
) -> dict:
|
|
u, p = _resolve(user, persona)
|
|
_check_cooldown(u, p, "short")
|
|
lock = _check_lock(u, p)
|
|
async with lock:
|
|
_LOCKS_META[(u, p)] = "short distill"
|
|
try:
|
|
result = distill_short(u, p)
|
|
_record_run(u, p, "short")
|
|
return {"ok": True, **result}
|
|
finally:
|
|
_LOCKS_META.pop((u, p), None)
|
|
|
|
|
|
@router.post("/mid")
|
|
async def do_distill_mid(
|
|
user: str = Query(...),
|
|
persona: str = Query(...),
|
|
) -> dict:
|
|
u, p = _resolve(user, persona)
|
|
_check_cooldown(u, p, "mid")
|
|
lock = _check_lock(u, p)
|
|
async with lock:
|
|
_LOCKS_META[(u, p)] = "mid distill"
|
|
try:
|
|
result = await distill_mid(u, p)
|
|
if "error" not in result:
|
|
_record_run(u, p, "mid")
|
|
return {"ok": "error" not in result, **result}
|
|
finally:
|
|
_LOCKS_META.pop((u, p), None)
|
|
|
|
|
|
@router.post("/long")
|
|
async def do_distill_long(
|
|
user: str = Query(...),
|
|
persona: str = Query(...),
|
|
) -> dict:
|
|
u, p = _resolve(user, persona)
|
|
_check_cooldown(u, p, "long")
|
|
lock = _check_lock(u, p)
|
|
async with lock:
|
|
_LOCKS_META[(u, p)] = "long distill"
|
|
try:
|
|
result = await distill_long(u, p)
|
|
if "error" not in result:
|
|
_record_run(u, p, "long")
|
|
return {"ok": "error" not in result, **result}
|
|
finally:
|
|
_LOCKS_META.pop((u, p), None)
|
|
|
|
|
|
@router.post("/all")
|
|
async def do_distill_all(
|
|
user: str = Query(...),
|
|
persona: str = Query(...),
|
|
) -> dict:
|
|
u, p = _resolve(user, persona)
|
|
_check_cooldown(u, p, "all")
|
|
lock = _check_lock(u, p)
|
|
async with lock:
|
|
_LOCKS_META[(u, p)] = "full distill"
|
|
try:
|
|
short_result = distill_short(u, p)
|
|
mid_result = await distill_mid(u, p)
|
|
if "error" in mid_result:
|
|
return {"ok": False, "short": short_result, "mid": mid_result}
|
|
long_result = await distill_long(u, p)
|
|
ok = "error" not in long_result
|
|
if ok:
|
|
_record_run(u, p, "all")
|
|
return {
|
|
"ok": ok,
|
|
"short": short_result,
|
|
"mid": mid_result,
|
|
"long": long_result,
|
|
}
|
|
finally:
|
|
_LOCKS_META.pop((u, p), None)
|
|
|
|
|
|
@router.post("/rebuild")
|
|
async def do_distill_rebuild(
|
|
user: str = Query(...),
|
|
persona: str = Query(...),
|
|
) -> dict: # noqa: E501
|
|
"""Wipe MEMORY_MID and MEMORY_LONG (with backups), then run short → mid → long.
|
|
|
|
Use when memories have drifted, been corrupted, or you want a clean slate
|
|
rebuilt purely from session logs. Hand-edited content will be replaced.
|
|
"""
|
|
u, p = _resolve(user, persona)
|
|
_check_cooldown(u, p, "rebuild")
|
|
lock = _check_lock(u, p)
|
|
async with lock:
|
|
_LOCKS_META[(u, p)] = "memory rebuild"
|
|
try:
|
|
from memory_distiller import _rotate_backup, _read
|
|
inara_dir = _persona_path(u, p)
|
|
|
|
# Back up then wipe mid and long before rebuilding
|
|
for name in ("MEMORY_MID.md", "MEMORY_LONG.md"):
|
|
path = inara_dir / name
|
|
if path.exists():
|
|
_rotate_backup(path)
|
|
path.write_text(
|
|
f"# {name}\n\n*Cleared for rebuild — {__import__('datetime').datetime.now().strftime('%Y-%m-%d %H:%M')}.*\n"
|
|
)
|
|
|
|
short_result = distill_short(u, p)
|
|
mid_result = await distill_mid(u, p)
|
|
if "error" in mid_result:
|
|
return {"ok": False, "short": short_result, "mid": mid_result, "rebuilt": True}
|
|
long_result = await distill_long(u, p)
|
|
ok = "error" not in long_result
|
|
if ok:
|
|
_record_run(u, p, "rebuild")
|
|
return {
|
|
"ok": ok,
|
|
"short": short_result,
|
|
"mid": mid_result,
|
|
"long": long_result,
|
|
"rebuilt": True,
|
|
}
|
|
finally:
|
|
_LOCKS_META.pop((u, p), None)
|