Add tiered memory system with manual distillation

- config.py: memory_budget_long/mid/short settings (overridable in .env) - memory_distiller.py: distill_short (no LLM), distill_mid, distill_long (LLM) - routers/distill.py: POST /distill/{short,mid,long,all} endpoints - context_loader.py: rewrote to load long→mid→short order with include_* toggles - routers/chat.py: ChatRequest gains include_long/mid/short fields - routers/files.py: MEMORY_LONG/MID/SHORT.md added to ALLOWED set - main.py: register distill router - static/index.html: context bar — tier selector, L/M/S memory toggles, distill buttons with status feedback; send includes tier + memory flags - inara/MEMORY_LONG.md: migrated from MEMORY.md + Cortex/Talk bot notes - inara/MEMORY_MID.md, MEMORY_SHORT.md: stubs ready for distillation Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 21:22:32 -04:00
parent 3455c7a09c
commit ce3c1f5f7f
11 changed files with 779 additions and 29 deletions
--- a/cortex/routers/chat.py
+++ b/cortex/routers/chat.py
@@ -19,6 +19,9 @@ class ChatRequest(BaseModel):
    session_id: str | None = None
    tier: int | None = None
    model: str | None = None  # "claude" or "gemini" to override; None = use primary_backend
+    include_long: bool = True
+    include_mid: bool = True
+    include_short: bool = True


 class BackendRequest(BaseModel):
@@ -49,7 +52,12 @@ async def _stream_chat(req: ChatRequest):
    session_id = req.session_id or generate_session_id()
    tier = req.tier or settings.default_tier

-    system_prompt = load_context(tier)
+    system_prompt = load_context(
+        tier,
+        include_long=req.include_long,
+        include_mid=req.include_mid,
+        include_short=req.include_short,
+    )
    history = load_session(session_id)
    history.append({"role": "user", "content": req.message})

--- a/cortex/routers/distill.py
+++ b/cortex/routers/distill.py
@@ -0,0 +1,44 @@
+"""
+Manual memory distillation endpoints.
+
+  POST /distill/short  — roll session logs → MEMORY_SHORT.md (no LLM)
+  POST /distill/mid    — summarize short   → MEMORY_MID.md   (LLM)
+  POST /distill/long   — integrate mid     → MEMORY_LONG.md  (LLM)
+  POST /distill/all    — run all three in sequence
+"""
+from fastapi import APIRouter
+from memory_distiller import distill_short, distill_mid, distill_long
+
+router = APIRouter(prefix="/distill")
+
+
+@router.post("/short")
+async def do_distill_short() -> dict:
+    return {"ok": True, **distill_short()}
+
+
+@router.post("/mid")
+async def do_distill_mid() -> dict:
+    result = await distill_mid()
+    return {"ok": "error" not in result, **result}
+
+
+@router.post("/long")
+async def do_distill_long() -> dict:
+    result = await distill_long()
+    return {"ok": "error" not in result, **result}
+
+
+@router.post("/all")
+async def do_distill_all() -> dict:
+    short_result = distill_short()
+    mid_result = await distill_mid()
+    if "error" in mid_result:
+        return {"ok": False, "short": short_result, "mid": mid_result}
+    long_result = await distill_long()
+    return {
+        "ok": "error" not in long_result,
+        "short": short_result,
+        "mid": mid_result,
+        "long": long_result,
+    }
--- a/cortex/routers/files.py
+++ b/cortex/routers/files.py
@@ -12,9 +12,12 @@ ALLOWED = {
    "SOUL.md",
    "IDENTITY.md",
    "USER.md",
-    "MEMORY.md",
    "PROTOCOLS.md",
    "CONTEXT_TIERS.md",
+    "MEMORY.md",        # legacy — kept for reference
+    "MEMORY_LONG.md",
+    "MEMORY_MID.md",
+    "MEMORY_SHORT.md",
 }