Add tiered memory system with manual distillation
- config.py: memory_budget_long/mid/short settings (overridable in .env)
- memory_distiller.py: distill_short (no LLM), distill_mid, distill_long (LLM)
- routers/distill.py: POST /distill/{short,mid,long,all} endpoints
- context_loader.py: rewrote to load long→mid→short order with include_* toggles
- routers/chat.py: ChatRequest gains include_long/mid/short fields
- routers/files.py: MEMORY_LONG/MID/SHORT.md added to ALLOWED set
- main.py: register distill router
- static/index.html: context bar — tier selector, L/M/S memory toggles,
distill buttons with status feedback; send includes tier + memory flags
- inara/MEMORY_LONG.md: migrated from MEMORY.md + Cortex/Talk bot notes
- inara/MEMORY_MID.md, MEMORY_SHORT.md: stubs ready for distillation
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
170
cortex/memory_distiller.py
Normal file
170
cortex/memory_distiller.py
Normal file
@@ -0,0 +1,170 @@
|
||||
"""
|
||||
Inara tiered memory distillation.
|
||||
|
||||
distill_short() — roll recent session logs → MEMORY_SHORT.md (no LLM)
|
||||
distill_mid() — summarize MEMORY_SHORT → MEMORY_MID.md (LLM)
|
||||
distill_long() — integrate MEMORY_MID → MEMORY_LONG.md (LLM)
|
||||
"""
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Rough chars-per-token estimate for budget enforcement
|
||||
_CHARS_PER_TOKEN = 4
|
||||
|
||||
|
||||
def _budget_chars(tokens: int) -> int:
|
||||
return tokens * _CHARS_PER_TOKEN
|
||||
|
||||
|
||||
def _read(path: Path) -> str:
|
||||
return path.read_text() if path.exists() else ""
|
||||
|
||||
|
||||
def distill_short() -> dict:
|
||||
"""
|
||||
Roll the most recent session log files into MEMORY_SHORT.md.
|
||||
No LLM involved — pure aggregation with budget truncation.
|
||||
Files are included newest-first until the budget is reached,
|
||||
then written in chronological order (oldest first).
|
||||
"""
|
||||
inara_dir = settings.inara_path()
|
||||
sessions_dir = inara_dir / "sessions"
|
||||
budget = _budget_chars(settings.memory_budget_short)
|
||||
|
||||
session_files = (
|
||||
sorted(sessions_dir.glob("*.md"), reverse=True)
|
||||
if sessions_dir.exists()
|
||||
else []
|
||||
)
|
||||
|
||||
parts = []
|
||||
total_chars = 0
|
||||
for sf in session_files:
|
||||
content = sf.read_text()
|
||||
if total_chars + len(content) > budget and parts:
|
||||
break # always include at least one file
|
||||
parts.append((sf.name, content))
|
||||
total_chars += len(content)
|
||||
if total_chars >= budget:
|
||||
break
|
||||
|
||||
now = datetime.now().strftime("%Y-%m-%d %H:%M")
|
||||
header = (
|
||||
f"# MEMORY_SHORT.md — Recent Session Digest\n\n"
|
||||
f"*Auto-generated: {now}. {len(parts)} session file(s).*\n\n---\n\n"
|
||||
)
|
||||
# Write in chronological order (oldest first)
|
||||
body = "\n\n".join(
|
||||
f"--- {name} ---\n{content}" for name, content in reversed(parts)
|
||||
)
|
||||
|
||||
out_path = inara_dir / "MEMORY_SHORT.md"
|
||||
out_path.write_text(header + body)
|
||||
logger.info("distill_short: wrote %d chars from %d files", len(header) + len(body), len(parts))
|
||||
|
||||
return {
|
||||
"files_included": len(parts),
|
||||
"chars_written": len(header) + len(body),
|
||||
"budget_chars": budget,
|
||||
}
|
||||
|
||||
|
||||
async def distill_mid() -> dict:
|
||||
"""
|
||||
Ask the LLM to summarize MEMORY_SHORT.md → MEMORY_MID.md.
|
||||
"""
|
||||
from llm_client import complete
|
||||
|
||||
inara_dir = settings.inara_path()
|
||||
short_content = _read(inara_dir / "MEMORY_SHORT.md")
|
||||
|
||||
if not short_content.strip() or "Not yet populated" in short_content:
|
||||
return {"error": "MEMORY_SHORT.md is empty — run distill/short first"}
|
||||
|
||||
budget_tokens = settings.memory_budget_mid
|
||||
system_prompt = (
|
||||
"You are Inara's memory distillation system. "
|
||||
"Summarize the following recent session logs into a concise mid-term memory digest. "
|
||||
f"Target length: under {budget_tokens} tokens. "
|
||||
"Focus on: recurring themes, important decisions made, ongoing projects, "
|
||||
"Scott's current state and priorities, and anything that should persist into future sessions. "
|
||||
"Write in first person as Inara (e.g. 'Scott and I worked on...'). "
|
||||
"Use markdown headings. Be specific and concrete — no filler."
|
||||
)
|
||||
|
||||
response_text, backend = await complete(
|
||||
system_prompt=system_prompt,
|
||||
messages=[{"role": "user", "content": short_content}],
|
||||
)
|
||||
|
||||
now = datetime.now().strftime("%Y-%m-%d %H:%M")
|
||||
header = (
|
||||
f"# MEMORY_MID.md — Mid-Term Memory Digest\n\n"
|
||||
f"*Auto-distilled: {now} via {backend}.*\n\n---\n\n"
|
||||
)
|
||||
out_path = inara_dir / "MEMORY_MID.md"
|
||||
out_path.write_text(header + response_text)
|
||||
logger.info("distill_mid: wrote %d chars via %s", len(header) + len(response_text), backend)
|
||||
|
||||
return {
|
||||
"backend": backend,
|
||||
"chars_written": len(header) + len(response_text),
|
||||
"budget_tokens": budget_tokens,
|
||||
}
|
||||
|
||||
|
||||
async def distill_long() -> dict:
|
||||
"""
|
||||
Ask the LLM to integrate MEMORY_MID.md into MEMORY_LONG.md.
|
||||
"""
|
||||
from llm_client import complete
|
||||
|
||||
inara_dir = settings.inara_path()
|
||||
long_content = _read(inara_dir / "MEMORY_LONG.md")
|
||||
mid_content = _read(inara_dir / "MEMORY_MID.md")
|
||||
|
||||
if not mid_content.strip() or "Not yet populated" in mid_content:
|
||||
return {"error": "MEMORY_MID.md is empty — run distill/mid first"}
|
||||
|
||||
budget_tokens = settings.memory_budget_long
|
||||
system_prompt = (
|
||||
"You are Inara's long-term memory curator. "
|
||||
"You will receive the current long-term memory and a recent mid-term digest. "
|
||||
f"Integrate the new information into the long-term memory. Target: under {budget_tokens} tokens. "
|
||||
"Rules: preserve important historical facts; update or replace stale information; "
|
||||
"absorb recurring themes from the mid-term digest; remove things no longer relevant. "
|
||||
"Return ONLY the updated MEMORY_LONG.md content in markdown. No preamble or commentary."
|
||||
)
|
||||
|
||||
user_content = (
|
||||
f"## Current MEMORY_LONG.md\n\n{long_content}\n\n"
|
||||
f"## Recent MEMORY_MID.md to integrate\n\n{mid_content}"
|
||||
)
|
||||
|
||||
response_text, backend = await complete(
|
||||
system_prompt=system_prompt,
|
||||
messages=[{"role": "user", "content": user_content}],
|
||||
)
|
||||
|
||||
# Ensure the file has the right header if the LLM dropped it
|
||||
now = datetime.now().strftime("%Y-%m-%d %H:%M")
|
||||
if not response_text.lstrip().startswith("# MEMORY_LONG"):
|
||||
response_text = (
|
||||
f"# MEMORY_LONG.md — Inara Long-Term Memory\n\n"
|
||||
f"*Last distilled: {now} via {backend}.*\n\n---\n\n"
|
||||
+ response_text
|
||||
)
|
||||
|
||||
out_path = inara_dir / "MEMORY_LONG.md"
|
||||
out_path.write_text(response_text)
|
||||
logger.info("distill_long: wrote %d chars via %s", len(response_text), backend)
|
||||
|
||||
return {
|
||||
"backend": backend,
|
||||
"chars_written": len(response_text),
|
||||
"budget_tokens": budget_tokens,
|
||||
}
|
||||
Reference in New Issue
Block a user