Add tiered memory system with manual distillation
- config.py: memory_budget_long/mid/short settings (overridable in .env)
- memory_distiller.py: distill_short (no LLM), distill_mid, distill_long (LLM)
- routers/distill.py: POST /distill/{short,mid,long,all} endpoints
- context_loader.py: rewrote to load long→mid→short order with include_* toggles
- routers/chat.py: ChatRequest gains include_long/mid/short fields
- routers/files.py: MEMORY_LONG/MID/SHORT.md added to ALLOWED set
- main.py: register distill router
- static/index.html: context bar — tier selector, L/M/S memory toggles,
distill buttons with status feedback; send includes tier + memory flags
- inara/MEMORY_LONG.md: migrated from MEMORY.md + Cortex/Talk bot notes
- inara/MEMORY_MID.md, MEMORY_SHORT.md: stubs ready for distillation
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2,46 +2,83 @@ from pathlib import Path
|
||||
from config import settings
|
||||
|
||||
|
||||
# Files loaded per tier — mirrors CONTEXT_TIERS.md
|
||||
TIER_FILES: dict[int, list[str]] = {
|
||||
1: ["SOUL.md", "IDENTITY.md"], # + USER.md summary only
|
||||
2: ["SOUL.md", "IDENTITY.md", "USER.md", "MEMORY.md", "PROTOCOLS.md"],
|
||||
3: ["SOUL.md", "IDENTITY.md", "USER.md", "MEMORY.md", "PROTOCOLS.md"],
|
||||
4: ["SOUL.md", "IDENTITY.md", "USER.md", "MEMORY.md", "PROTOCOLS.md"],
|
||||
}
|
||||
# Core identity files — always loaded regardless of tier
|
||||
_CORE = ["SOUL.md", "IDENTITY.md"]
|
||||
|
||||
# Lines of USER.md to include at Tier 1 (just identity + what he cares about)
|
||||
TIER_1_USER_LINES = 30
|
||||
# Lines of USER.md to include at Tier 1 (identity + what he cares about)
|
||||
_TIER_1_USER_LINES = 30
|
||||
|
||||
|
||||
def _read(path: Path) -> str:
|
||||
if path.exists():
|
||||
return path.read_text()
|
||||
return f"[missing: {path.name}]"
|
||||
def load_context(
|
||||
tier: int = 2,
|
||||
include_long: bool = True,
|
||||
include_mid: bool = True,
|
||||
include_short: bool = True,
|
||||
) -> str:
|
||||
"""
|
||||
Build the system-prompt context block for a given tier and memory toggles.
|
||||
|
||||
Load order (long → mid → short) keeps the most recent memory closest
|
||||
to the conversation turn, which improves LLM recall.
|
||||
|
||||
def load_context(tier: int = 2) -> str:
|
||||
Tier 1 — SOUL + IDENTITY + USER summary (~1,500 tokens)
|
||||
Tier 2 — + USER full + PROTOCOLS + memory (~5,000 tokens)
|
||||
Tier 3 — + last 2 raw session logs (~15,000 tokens)
|
||||
Tier 4 — + last 7 raw session logs (~50,000 tokens)
|
||||
"""
|
||||
inara_dir = settings.inara_path()
|
||||
parts = []
|
||||
|
||||
files = TIER_FILES.get(tier, TIER_FILES[2])
|
||||
|
||||
for filename in files:
|
||||
# ── 1. Core identity (always) ──────────────────────────────────
|
||||
for filename in _CORE:
|
||||
path = inara_dir / filename
|
||||
if not path.exists():
|
||||
continue
|
||||
if path.exists():
|
||||
parts.append(f"--- {filename} ---\n{path.read_text()}")
|
||||
|
||||
if filename == "USER.md" and tier == 1:
|
||||
# Tier 1: include only the first N lines
|
||||
lines = path.read_text().splitlines()[:TIER_1_USER_LINES]
|
||||
# ── 2. USER.md ─────────────────────────────────────────────────
|
||||
user_path = inara_dir / "USER.md"
|
||||
if user_path.exists():
|
||||
if tier == 1:
|
||||
lines = user_path.read_text().splitlines()[:_TIER_1_USER_LINES]
|
||||
content = "\n".join(lines)
|
||||
else:
|
||||
content = path.read_text()
|
||||
content = user_path.read_text()
|
||||
parts.append(f"--- USER.md ---\n{content}")
|
||||
|
||||
parts.append(f"--- {filename} ---\n{content}")
|
||||
if tier < 2:
|
||||
return "\n\n".join(parts)
|
||||
|
||||
# ── 3. Protocols (tier 2+) ─────────────────────────────────────
|
||||
proto_path = inara_dir / "PROTOCOLS.md"
|
||||
if proto_path.exists():
|
||||
parts.append(f"--- PROTOCOLS.md ---\n{proto_path.read_text()}")
|
||||
|
||||
# ── 4. Tiered memory — long → mid → short ─────────────────────
|
||||
# Short is last so it sits closest to the conversation turn.
|
||||
if include_long:
|
||||
# Fall back to legacy MEMORY.md during/after migration
|
||||
long_path = inara_dir / "MEMORY_LONG.md"
|
||||
if not long_path.exists():
|
||||
long_path = inara_dir / "MEMORY.md"
|
||||
if long_path.exists():
|
||||
parts.append(f"--- {long_path.name} ---\n{long_path.read_text()}")
|
||||
|
||||
if include_mid:
|
||||
mid_path = inara_dir / "MEMORY_MID.md"
|
||||
if mid_path.exists() and mid_path.stat().st_size > 100:
|
||||
content = mid_path.read_text()
|
||||
if "Not yet populated" not in content:
|
||||
parts.append(f"--- MEMORY_MID.md ---\n{content}")
|
||||
|
||||
if include_short:
|
||||
short_path = inara_dir / "MEMORY_SHORT.md"
|
||||
if short_path.exists() and short_path.stat().st_size > 100:
|
||||
content = short_path.read_text()
|
||||
if "Not yet populated" not in content:
|
||||
parts.append(f"--- MEMORY_SHORT.md ---\n{content}")
|
||||
|
||||
# ── 5. Raw session logs (tier 3+) ──────────────────────────────
|
||||
if tier >= 3:
|
||||
# Add recent session logs
|
||||
sessions_dir = inara_dir / "sessions"
|
||||
if sessions_dir.exists():
|
||||
count = 2 if tier == 3 else 7
|
||||
|
||||
Reference in New Issue
Block a user