Add tiered memory system with manual distillation
- config.py: memory_budget_long/mid/short settings (overridable in .env)
- memory_distiller.py: distill_short (no LLM), distill_mid, distill_long (LLM)
- routers/distill.py: POST /distill/{short,mid,long,all} endpoints
- context_loader.py: rewrote to load long→mid→short order with include_* toggles
- routers/chat.py: ChatRequest gains include_long/mid/short fields
- routers/files.py: MEMORY_LONG/MID/SHORT.md added to ALLOWED set
- main.py: register distill router
- static/index.html: context bar — tier selector, L/M/S memory toggles,
distill buttons with status feedback; send includes tier + memory flags
- inara/MEMORY_LONG.md: migrated from MEMORY.md + Cortex/Talk bot notes
- inara/MEMORY_MID.md, MEMORY_SHORT.md: stubs ready for distillation
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -19,6 +19,9 @@ class ChatRequest(BaseModel):
|
||||
session_id: str | None = None
|
||||
tier: int | None = None
|
||||
model: str | None = None # "claude" or "gemini" to override; None = use primary_backend
|
||||
include_long: bool = True
|
||||
include_mid: bool = True
|
||||
include_short: bool = True
|
||||
|
||||
|
||||
class BackendRequest(BaseModel):
|
||||
@@ -49,7 +52,12 @@ async def _stream_chat(req: ChatRequest):
|
||||
session_id = req.session_id or generate_session_id()
|
||||
tier = req.tier or settings.default_tier
|
||||
|
||||
system_prompt = load_context(tier)
|
||||
system_prompt = load_context(
|
||||
tier,
|
||||
include_long=req.include_long,
|
||||
include_mid=req.include_mid,
|
||||
include_short=req.include_short,
|
||||
)
|
||||
history = load_session(session_id)
|
||||
history.append({"role": "user", "content": req.message})
|
||||
|
||||
|
||||
44
cortex/routers/distill.py
Normal file
44
cortex/routers/distill.py
Normal file
@@ -0,0 +1,44 @@
|
||||
"""
|
||||
Manual memory distillation endpoints.
|
||||
|
||||
POST /distill/short — roll session logs → MEMORY_SHORT.md (no LLM)
|
||||
POST /distill/mid — summarize short → MEMORY_MID.md (LLM)
|
||||
POST /distill/long — integrate mid → MEMORY_LONG.md (LLM)
|
||||
POST /distill/all — run all three in sequence
|
||||
"""
|
||||
from fastapi import APIRouter
|
||||
from memory_distiller import distill_short, distill_mid, distill_long
|
||||
|
||||
router = APIRouter(prefix="/distill")
|
||||
|
||||
|
||||
@router.post("/short")
|
||||
async def do_distill_short() -> dict:
|
||||
return {"ok": True, **distill_short()}
|
||||
|
||||
|
||||
@router.post("/mid")
|
||||
async def do_distill_mid() -> dict:
|
||||
result = await distill_mid()
|
||||
return {"ok": "error" not in result, **result}
|
||||
|
||||
|
||||
@router.post("/long")
|
||||
async def do_distill_long() -> dict:
|
||||
result = await distill_long()
|
||||
return {"ok": "error" not in result, **result}
|
||||
|
||||
|
||||
@router.post("/all")
|
||||
async def do_distill_all() -> dict:
|
||||
short_result = distill_short()
|
||||
mid_result = await distill_mid()
|
||||
if "error" in mid_result:
|
||||
return {"ok": False, "short": short_result, "mid": mid_result}
|
||||
long_result = await distill_long()
|
||||
return {
|
||||
"ok": "error" not in long_result,
|
||||
"short": short_result,
|
||||
"mid": mid_result,
|
||||
"long": long_result,
|
||||
}
|
||||
@@ -12,9 +12,12 @@ ALLOWED = {
|
||||
"SOUL.md",
|
||||
"IDENTITY.md",
|
||||
"USER.md",
|
||||
"MEMORY.md",
|
||||
"PROTOCOLS.md",
|
||||
"CONTEXT_TIERS.md",
|
||||
"MEMORY.md", # legacy — kept for reference
|
||||
"MEMORY_LONG.md",
|
||||
"MEMORY_MID.md",
|
||||
"MEMORY_SHORT.md",
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user