- config.py: memory_budget_long/mid/short settings (overridable in .env)
- memory_distiller.py: distill_short (no LLM), distill_mid, distill_long (LLM)
- routers/distill.py: POST /distill/{short,mid,long,all} endpoints
- context_loader.py: rewrote to load long→mid→short order with include_* toggles
- routers/chat.py: ChatRequest gains include_long/mid/short fields
- routers/files.py: MEMORY_LONG/MID/SHORT.md added to ALLOWED set
- main.py: register distill router
- static/index.html: context bar — tier selector, L/M/S memory toggles,
distill buttons with status feedback; send includes tier + memory flags
- inara/MEMORY_LONG.md: migrated from MEMORY.md + Cortex/Talk bot notes
- inara/MEMORY_MID.md, MEMORY_SHORT.md: stubs ready for distillation
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
90 lines
3.6 KiB
Python
90 lines
3.6 KiB
Python
from pathlib import Path
|
|
from config import settings
|
|
|
|
|
|
# Core identity files — always loaded regardless of tier
|
|
_CORE = ["SOUL.md", "IDENTITY.md"]
|
|
|
|
# Lines of USER.md to include at Tier 1 (identity + what he cares about)
|
|
_TIER_1_USER_LINES = 30
|
|
|
|
|
|
def load_context(
|
|
tier: int = 2,
|
|
include_long: bool = True,
|
|
include_mid: bool = True,
|
|
include_short: bool = True,
|
|
) -> str:
|
|
"""
|
|
Build the system-prompt context block for a given tier and memory toggles.
|
|
|
|
Load order (long → mid → short) keeps the most recent memory closest
|
|
to the conversation turn, which improves LLM recall.
|
|
|
|
Tier 1 — SOUL + IDENTITY + USER summary (~1,500 tokens)
|
|
Tier 2 — + USER full + PROTOCOLS + memory (~5,000 tokens)
|
|
Tier 3 — + last 2 raw session logs (~15,000 tokens)
|
|
Tier 4 — + last 7 raw session logs (~50,000 tokens)
|
|
"""
|
|
inara_dir = settings.inara_path()
|
|
parts = []
|
|
|
|
# ── 1. Core identity (always) ──────────────────────────────────
|
|
for filename in _CORE:
|
|
path = inara_dir / filename
|
|
if path.exists():
|
|
parts.append(f"--- {filename} ---\n{path.read_text()}")
|
|
|
|
# ── 2. USER.md ─────────────────────────────────────────────────
|
|
user_path = inara_dir / "USER.md"
|
|
if user_path.exists():
|
|
if tier == 1:
|
|
lines = user_path.read_text().splitlines()[:_TIER_1_USER_LINES]
|
|
content = "\n".join(lines)
|
|
else:
|
|
content = user_path.read_text()
|
|
parts.append(f"--- USER.md ---\n{content}")
|
|
|
|
if tier < 2:
|
|
return "\n\n".join(parts)
|
|
|
|
# ── 3. Protocols (tier 2+) ─────────────────────────────────────
|
|
proto_path = inara_dir / "PROTOCOLS.md"
|
|
if proto_path.exists():
|
|
parts.append(f"--- PROTOCOLS.md ---\n{proto_path.read_text()}")
|
|
|
|
# ── 4. Tiered memory — long → mid → short ─────────────────────
|
|
# Short is last so it sits closest to the conversation turn.
|
|
if include_long:
|
|
# Fall back to legacy MEMORY.md during/after migration
|
|
long_path = inara_dir / "MEMORY_LONG.md"
|
|
if not long_path.exists():
|
|
long_path = inara_dir / "MEMORY.md"
|
|
if long_path.exists():
|
|
parts.append(f"--- {long_path.name} ---\n{long_path.read_text()}")
|
|
|
|
if include_mid:
|
|
mid_path = inara_dir / "MEMORY_MID.md"
|
|
if mid_path.exists() and mid_path.stat().st_size > 100:
|
|
content = mid_path.read_text()
|
|
if "Not yet populated" not in content:
|
|
parts.append(f"--- MEMORY_MID.md ---\n{content}")
|
|
|
|
if include_short:
|
|
short_path = inara_dir / "MEMORY_SHORT.md"
|
|
if short_path.exists() and short_path.stat().st_size > 100:
|
|
content = short_path.read_text()
|
|
if "Not yet populated" not in content:
|
|
parts.append(f"--- MEMORY_SHORT.md ---\n{content}")
|
|
|
|
# ── 5. Raw session logs (tier 3+) ──────────────────────────────
|
|
if tier >= 3:
|
|
sessions_dir = inara_dir / "sessions"
|
|
if sessions_dir.exists():
|
|
count = 2 if tier == 3 else 7
|
|
session_files = sorted(sessions_dir.glob("*.md"), reverse=True)[:count]
|
|
for sf in session_files:
|
|
parts.append(f"--- Session: {sf.name} ---\n{sf.read_text()}")
|
|
|
|
return "\n\n".join(parts)
|