Files
Cortex-Inara/cortex/context_loader.py
Scott Idem ce3c1f5f7f Add tiered memory system with manual distillation
- config.py: memory_budget_long/mid/short settings (overridable in .env)
- memory_distiller.py: distill_short (no LLM), distill_mid, distill_long (LLM)
- routers/distill.py: POST /distill/{short,mid,long,all} endpoints
- context_loader.py: rewrote to load long→mid→short order with include_* toggles
- routers/chat.py: ChatRequest gains include_long/mid/short fields
- routers/files.py: MEMORY_LONG/MID/SHORT.md added to ALLOWED set
- main.py: register distill router
- static/index.html: context bar — tier selector, L/M/S memory toggles,
  distill buttons with status feedback; send includes tier + memory flags
- inara/MEMORY_LONG.md: migrated from MEMORY.md + Cortex/Talk bot notes
- inara/MEMORY_MID.md, MEMORY_SHORT.md: stubs ready for distillation

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 21:22:32 -04:00

90 lines
3.6 KiB
Python

from pathlib import Path
from config import settings
# Core identity files — always loaded regardless of tier
_CORE = ["SOUL.md", "IDENTITY.md"]
# Lines of USER.md to include at Tier 1 (identity + what he cares about)
_TIER_1_USER_LINES = 30
def load_context(
tier: int = 2,
include_long: bool = True,
include_mid: bool = True,
include_short: bool = True,
) -> str:
"""
Build the system-prompt context block for a given tier and memory toggles.
Load order (long → mid → short) keeps the most recent memory closest
to the conversation turn, which improves LLM recall.
Tier 1 — SOUL + IDENTITY + USER summary (~1,500 tokens)
Tier 2 — + USER full + PROTOCOLS + memory (~5,000 tokens)
Tier 3 — + last 2 raw session logs (~15,000 tokens)
Tier 4 — + last 7 raw session logs (~50,000 tokens)
"""
inara_dir = settings.inara_path()
parts = []
# ── 1. Core identity (always) ──────────────────────────────────
for filename in _CORE:
path = inara_dir / filename
if path.exists():
parts.append(f"--- {filename} ---\n{path.read_text()}")
# ── 2. USER.md ─────────────────────────────────────────────────
user_path = inara_dir / "USER.md"
if user_path.exists():
if tier == 1:
lines = user_path.read_text().splitlines()[:_TIER_1_USER_LINES]
content = "\n".join(lines)
else:
content = user_path.read_text()
parts.append(f"--- USER.md ---\n{content}")
if tier < 2:
return "\n\n".join(parts)
# ── 3. Protocols (tier 2+) ─────────────────────────────────────
proto_path = inara_dir / "PROTOCOLS.md"
if proto_path.exists():
parts.append(f"--- PROTOCOLS.md ---\n{proto_path.read_text()}")
# ── 4. Tiered memory — long → mid → short ─────────────────────
# Short is last so it sits closest to the conversation turn.
if include_long:
# Fall back to legacy MEMORY.md during/after migration
long_path = inara_dir / "MEMORY_LONG.md"
if not long_path.exists():
long_path = inara_dir / "MEMORY.md"
if long_path.exists():
parts.append(f"--- {long_path.name} ---\n{long_path.read_text()}")
if include_mid:
mid_path = inara_dir / "MEMORY_MID.md"
if mid_path.exists() and mid_path.stat().st_size > 100:
content = mid_path.read_text()
if "Not yet populated" not in content:
parts.append(f"--- MEMORY_MID.md ---\n{content}")
if include_short:
short_path = inara_dir / "MEMORY_SHORT.md"
if short_path.exists() and short_path.stat().st_size > 100:
content = short_path.read_text()
if "Not yet populated" not in content:
parts.append(f"--- MEMORY_SHORT.md ---\n{content}")
# ── 5. Raw session logs (tier 3+) ──────────────────────────────
if tier >= 3:
sessions_dir = inara_dir / "sessions"
if sessions_dir.exists():
count = 2 if tier == 3 else 7
session_files = sorted(sessions_dir.glob("*.md"), reverse=True)[:count]
for sf in session_files:
parts.append(f"--- Session: {sf.name} ---\n{sf.read_text()}")
return "\n\n".join(parts)