Cortex-Inara/cortex/context_loader.py

from pathlib import Path
from config import settings


# Core identity files — always loaded regardless of tier
_CORE = ["SOUL.md", "IDENTITY.md"]

# Lines of USER.md to include at Tier 1 (identity + what he cares about)
_TIER_1_USER_LINES = 30


def load_context(
    tier: int = 2,
    include_long: bool = True,
    include_mid: bool = True,
    include_short: bool = True,
) -> str:
    """
    Build the system-prompt context block for a given tier and memory toggles.

    Load order (long → mid → short) keeps the most recent memory closest
    to the conversation turn, which improves LLM recall.

    Tier 1  — SOUL + IDENTITY + USER summary        (~1,500 tokens)
    Tier 2  — + USER full + PROTOCOLS + memory      (~5,000 tokens)
    Tier 3  — + last 2 raw session logs             (~15,000 tokens)
    Tier 4  — + last 7 raw session logs             (~50,000 tokens)
    """
    inara_dir = settings.inara_path()
    parts = []

    # ── 1. Core identity (always) ──────────────────────────────────
    for filename in _CORE:
        path = inara_dir / filename
        if path.exists():
            parts.append(f"--- {filename} ---\n{path.read_text()}")

    # ── 2. USER.md ─────────────────────────────────────────────────
    user_path = inara_dir / "USER.md"
    if user_path.exists():
        if tier == 1:
            lines = user_path.read_text().splitlines()[:_TIER_1_USER_LINES]
            content = "\n".join(lines)
        else:
            content = user_path.read_text()
        parts.append(f"--- USER.md ---\n{content}")

    if tier < 2:
        return "\n\n".join(parts)

    # ── 3. Protocols (tier 2+) ─────────────────────────────────────
    proto_path = inara_dir / "PROTOCOLS.md"
    if proto_path.exists():
        parts.append(f"--- PROTOCOLS.md ---\n{proto_path.read_text()}")

    # ── 4. Tiered memory — long → mid → short ─────────────────────
    #    Short is last so it sits closest to the conversation turn.
    if include_long:
        # Fall back to legacy MEMORY.md during/after migration
        long_path = inara_dir / "MEMORY_LONG.md"
        if not long_path.exists():
            long_path = inara_dir / "MEMORY.md"
        if long_path.exists():
            parts.append(f"--- {long_path.name} ---\n{long_path.read_text()}")

    if include_mid:
        mid_path = inara_dir / "MEMORY_MID.md"
        if mid_path.exists() and mid_path.stat().st_size > 100:
            content = mid_path.read_text()
            if "Not yet populated" not in content:
                parts.append(f"--- MEMORY_MID.md ---\n{content}")

    if include_short:
        short_path = inara_dir / "MEMORY_SHORT.md"
        if short_path.exists() and short_path.stat().st_size > 100:
            content = short_path.read_text()
            if "Not yet populated" not in content:
                parts.append(f"--- MEMORY_SHORT.md ---\n{content}")

    # ── 5. Raw session logs (tier 3+) ──────────────────────────────
    if tier >= 3:
        sessions_dir = inara_dir / "sessions"
        if sessions_dir.exists():
            count = 2 if tier == 3 else 7
            session_files = sorted(sessions_dir.glob("*.md"), reverse=True)[:count]
            for sf in session_files:
                parts.append(f"--- Session: {sf.name} ---\n{sf.read_text()}")

    return "\n\n".join(parts)