diff --git a/cortex/config.py b/cortex/config.py index 63c0390..8ff8810 100644 --- a/cortex/config.py +++ b/cortex/config.py @@ -26,6 +26,12 @@ class Settings(BaseSettings): nextcloud_talk_bot_secret: str = "" # set in .env nextcloud_talk_timeout: int = 55 + # Memory tier token budgets — soft caps used during distillation + # Override in .env: MEMORY_BUDGET_LONG=4000 etc. + memory_budget_long: int = 2000 + memory_budget_mid: int = 2000 + memory_budget_short: int = 3000 + host: str = "0.0.0.0" port: int = 8000 diff --git a/cortex/context_loader.py b/cortex/context_loader.py index f6f46fb..67440ad 100644 --- a/cortex/context_loader.py +++ b/cortex/context_loader.py @@ -2,46 +2,83 @@ from pathlib import Path from config import settings -# Files loaded per tier — mirrors CONTEXT_TIERS.md -TIER_FILES: dict[int, list[str]] = { - 1: ["SOUL.md", "IDENTITY.md"], # + USER.md summary only - 2: ["SOUL.md", "IDENTITY.md", "USER.md", "MEMORY.md", "PROTOCOLS.md"], - 3: ["SOUL.md", "IDENTITY.md", "USER.md", "MEMORY.md", "PROTOCOLS.md"], - 4: ["SOUL.md", "IDENTITY.md", "USER.md", "MEMORY.md", "PROTOCOLS.md"], -} +# Core identity files — always loaded regardless of tier +_CORE = ["SOUL.md", "IDENTITY.md"] -# Lines of USER.md to include at Tier 1 (just identity + what he cares about) -TIER_1_USER_LINES = 30 +# Lines of USER.md to include at Tier 1 (identity + what he cares about) +_TIER_1_USER_LINES = 30 -def _read(path: Path) -> str: - if path.exists(): - return path.read_text() - return f"[missing: {path.name}]" +def load_context( + tier: int = 2, + include_long: bool = True, + include_mid: bool = True, + include_short: bool = True, +) -> str: + """ + Build the system-prompt context block for a given tier and memory toggles. + Load order (long → mid → short) keeps the most recent memory closest + to the conversation turn, which improves LLM recall. -def load_context(tier: int = 2) -> str: + Tier 1 — SOUL + IDENTITY + USER summary (~1,500 tokens) + Tier 2 — + USER full + PROTOCOLS + memory (~5,000 tokens) + Tier 3 — + last 2 raw session logs (~15,000 tokens) + Tier 4 — + last 7 raw session logs (~50,000 tokens) + """ inara_dir = settings.inara_path() parts = [] - files = TIER_FILES.get(tier, TIER_FILES[2]) - - for filename in files: + # ── 1. Core identity (always) ────────────────────────────────── + for filename in _CORE: path = inara_dir / filename - if not path.exists(): - continue + if path.exists(): + parts.append(f"--- {filename} ---\n{path.read_text()}") - if filename == "USER.md" and tier == 1: - # Tier 1: include only the first N lines - lines = path.read_text().splitlines()[:TIER_1_USER_LINES] + # ── 2. USER.md ───────────────────────────────────────────────── + user_path = inara_dir / "USER.md" + if user_path.exists(): + if tier == 1: + lines = user_path.read_text().splitlines()[:_TIER_1_USER_LINES] content = "\n".join(lines) else: - content = path.read_text() + content = user_path.read_text() + parts.append(f"--- USER.md ---\n{content}") - parts.append(f"--- {filename} ---\n{content}") + if tier < 2: + return "\n\n".join(parts) + # ── 3. Protocols (tier 2+) ───────────────────────────────────── + proto_path = inara_dir / "PROTOCOLS.md" + if proto_path.exists(): + parts.append(f"--- PROTOCOLS.md ---\n{proto_path.read_text()}") + + # ── 4. Tiered memory — long → mid → short ───────────────────── + # Short is last so it sits closest to the conversation turn. + if include_long: + # Fall back to legacy MEMORY.md during/after migration + long_path = inara_dir / "MEMORY_LONG.md" + if not long_path.exists(): + long_path = inara_dir / "MEMORY.md" + if long_path.exists(): + parts.append(f"--- {long_path.name} ---\n{long_path.read_text()}") + + if include_mid: + mid_path = inara_dir / "MEMORY_MID.md" + if mid_path.exists() and mid_path.stat().st_size > 100: + content = mid_path.read_text() + if "Not yet populated" not in content: + parts.append(f"--- MEMORY_MID.md ---\n{content}") + + if include_short: + short_path = inara_dir / "MEMORY_SHORT.md" + if short_path.exists() and short_path.stat().st_size > 100: + content = short_path.read_text() + if "Not yet populated" not in content: + parts.append(f"--- MEMORY_SHORT.md ---\n{content}") + + # ── 5. Raw session logs (tier 3+) ────────────────────────────── if tier >= 3: - # Add recent session logs sessions_dir = inara_dir / "sessions" if sessions_dir.exists(): count = 2 if tier == 3 else 7 diff --git a/cortex/main.py b/cortex/main.py index 0685ac5..2175e3f 100644 --- a/cortex/main.py +++ b/cortex/main.py @@ -8,7 +8,7 @@ import uvicorn logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(name)s: %(message)s") from config import settings -from routers import chat, google_chat, nextcloud_talk, files +from routers import chat, google_chat, nextcloud_talk, files, distill @asynccontextmanager @@ -24,6 +24,7 @@ app.include_router(chat.router) app.include_router(google_chat.router) app.include_router(nextcloud_talk.router) app.include_router(files.router) +app.include_router(distill.router) app.mount("/static", StaticFiles(directory="static"), name="static") diff --git a/cortex/memory_distiller.py b/cortex/memory_distiller.py new file mode 100644 index 0000000..fe94d75 --- /dev/null +++ b/cortex/memory_distiller.py @@ -0,0 +1,170 @@ +""" +Inara tiered memory distillation. + + distill_short() — roll recent session logs → MEMORY_SHORT.md (no LLM) + distill_mid() — summarize MEMORY_SHORT → MEMORY_MID.md (LLM) + distill_long() — integrate MEMORY_MID → MEMORY_LONG.md (LLM) +""" +import logging +from datetime import datetime +from pathlib import Path +from config import settings + +logger = logging.getLogger(__name__) + +# Rough chars-per-token estimate for budget enforcement +_CHARS_PER_TOKEN = 4 + + +def _budget_chars(tokens: int) -> int: + return tokens * _CHARS_PER_TOKEN + + +def _read(path: Path) -> str: + return path.read_text() if path.exists() else "" + + +def distill_short() -> dict: + """ + Roll the most recent session log files into MEMORY_SHORT.md. + No LLM involved — pure aggregation with budget truncation. + Files are included newest-first until the budget is reached, + then written in chronological order (oldest first). + """ + inara_dir = settings.inara_path() + sessions_dir = inara_dir / "sessions" + budget = _budget_chars(settings.memory_budget_short) + + session_files = ( + sorted(sessions_dir.glob("*.md"), reverse=True) + if sessions_dir.exists() + else [] + ) + + parts = [] + total_chars = 0 + for sf in session_files: + content = sf.read_text() + if total_chars + len(content) > budget and parts: + break # always include at least one file + parts.append((sf.name, content)) + total_chars += len(content) + if total_chars >= budget: + break + + now = datetime.now().strftime("%Y-%m-%d %H:%M") + header = ( + f"# MEMORY_SHORT.md — Recent Session Digest\n\n" + f"*Auto-generated: {now}. {len(parts)} session file(s).*\n\n---\n\n" + ) + # Write in chronological order (oldest first) + body = "\n\n".join( + f"--- {name} ---\n{content}" for name, content in reversed(parts) + ) + + out_path = inara_dir / "MEMORY_SHORT.md" + out_path.write_text(header + body) + logger.info("distill_short: wrote %d chars from %d files", len(header) + len(body), len(parts)) + + return { + "files_included": len(parts), + "chars_written": len(header) + len(body), + "budget_chars": budget, + } + + +async def distill_mid() -> dict: + """ + Ask the LLM to summarize MEMORY_SHORT.md → MEMORY_MID.md. + """ + from llm_client import complete + + inara_dir = settings.inara_path() + short_content = _read(inara_dir / "MEMORY_SHORT.md") + + if not short_content.strip() or "Not yet populated" in short_content: + return {"error": "MEMORY_SHORT.md is empty — run distill/short first"} + + budget_tokens = settings.memory_budget_mid + system_prompt = ( + "You are Inara's memory distillation system. " + "Summarize the following recent session logs into a concise mid-term memory digest. " + f"Target length: under {budget_tokens} tokens. " + "Focus on: recurring themes, important decisions made, ongoing projects, " + "Scott's current state and priorities, and anything that should persist into future sessions. " + "Write in first person as Inara (e.g. 'Scott and I worked on...'). " + "Use markdown headings. Be specific and concrete — no filler." + ) + + response_text, backend = await complete( + system_prompt=system_prompt, + messages=[{"role": "user", "content": short_content}], + ) + + now = datetime.now().strftime("%Y-%m-%d %H:%M") + header = ( + f"# MEMORY_MID.md — Mid-Term Memory Digest\n\n" + f"*Auto-distilled: {now} via {backend}.*\n\n---\n\n" + ) + out_path = inara_dir / "MEMORY_MID.md" + out_path.write_text(header + response_text) + logger.info("distill_mid: wrote %d chars via %s", len(header) + len(response_text), backend) + + return { + "backend": backend, + "chars_written": len(header) + len(response_text), + "budget_tokens": budget_tokens, + } + + +async def distill_long() -> dict: + """ + Ask the LLM to integrate MEMORY_MID.md into MEMORY_LONG.md. + """ + from llm_client import complete + + inara_dir = settings.inara_path() + long_content = _read(inara_dir / "MEMORY_LONG.md") + mid_content = _read(inara_dir / "MEMORY_MID.md") + + if not mid_content.strip() or "Not yet populated" in mid_content: + return {"error": "MEMORY_MID.md is empty — run distill/mid first"} + + budget_tokens = settings.memory_budget_long + system_prompt = ( + "You are Inara's long-term memory curator. " + "You will receive the current long-term memory and a recent mid-term digest. " + f"Integrate the new information into the long-term memory. Target: under {budget_tokens} tokens. " + "Rules: preserve important historical facts; update or replace stale information; " + "absorb recurring themes from the mid-term digest; remove things no longer relevant. " + "Return ONLY the updated MEMORY_LONG.md content in markdown. No preamble or commentary." + ) + + user_content = ( + f"## Current MEMORY_LONG.md\n\n{long_content}\n\n" + f"## Recent MEMORY_MID.md to integrate\n\n{mid_content}" + ) + + response_text, backend = await complete( + system_prompt=system_prompt, + messages=[{"role": "user", "content": user_content}], + ) + + # Ensure the file has the right header if the LLM dropped it + now = datetime.now().strftime("%Y-%m-%d %H:%M") + if not response_text.lstrip().startswith("# MEMORY_LONG"): + response_text = ( + f"# MEMORY_LONG.md — Inara Long-Term Memory\n\n" + f"*Last distilled: {now} via {backend}.*\n\n---\n\n" + + response_text + ) + + out_path = inara_dir / "MEMORY_LONG.md" + out_path.write_text(response_text) + logger.info("distill_long: wrote %d chars via %s", len(response_text), backend) + + return { + "backend": backend, + "chars_written": len(response_text), + "budget_tokens": budget_tokens, + } diff --git a/cortex/routers/chat.py b/cortex/routers/chat.py index ea65294..5e4d21d 100644 --- a/cortex/routers/chat.py +++ b/cortex/routers/chat.py @@ -19,6 +19,9 @@ class ChatRequest(BaseModel): session_id: str | None = None tier: int | None = None model: str | None = None # "claude" or "gemini" to override; None = use primary_backend + include_long: bool = True + include_mid: bool = True + include_short: bool = True class BackendRequest(BaseModel): @@ -49,7 +52,12 @@ async def _stream_chat(req: ChatRequest): session_id = req.session_id or generate_session_id() tier = req.tier or settings.default_tier - system_prompt = load_context(tier) + system_prompt = load_context( + tier, + include_long=req.include_long, + include_mid=req.include_mid, + include_short=req.include_short, + ) history = load_session(session_id) history.append({"role": "user", "content": req.message}) diff --git a/cortex/routers/distill.py b/cortex/routers/distill.py new file mode 100644 index 0000000..df036f4 --- /dev/null +++ b/cortex/routers/distill.py @@ -0,0 +1,44 @@ +""" +Manual memory distillation endpoints. + + POST /distill/short — roll session logs → MEMORY_SHORT.md (no LLM) + POST /distill/mid — summarize short → MEMORY_MID.md (LLM) + POST /distill/long — integrate mid → MEMORY_LONG.md (LLM) + POST /distill/all — run all three in sequence +""" +from fastapi import APIRouter +from memory_distiller import distill_short, distill_mid, distill_long + +router = APIRouter(prefix="/distill") + + +@router.post("/short") +async def do_distill_short() -> dict: + return {"ok": True, **distill_short()} + + +@router.post("/mid") +async def do_distill_mid() -> dict: + result = await distill_mid() + return {"ok": "error" not in result, **result} + + +@router.post("/long") +async def do_distill_long() -> dict: + result = await distill_long() + return {"ok": "error" not in result, **result} + + +@router.post("/all") +async def do_distill_all() -> dict: + short_result = distill_short() + mid_result = await distill_mid() + if "error" in mid_result: + return {"ok": False, "short": short_result, "mid": mid_result} + long_result = await distill_long() + return { + "ok": "error" not in long_result, + "short": short_result, + "mid": mid_result, + "long": long_result, + } diff --git a/cortex/routers/files.py b/cortex/routers/files.py index d83f81b..2ad2740 100644 --- a/cortex/routers/files.py +++ b/cortex/routers/files.py @@ -12,9 +12,12 @@ ALLOWED = { "SOUL.md", "IDENTITY.md", "USER.md", - "MEMORY.md", "PROTOCOLS.md", "CONTEXT_TIERS.md", + "MEMORY.md", # legacy — kept for reference + "MEMORY_LONG.md", + "MEMORY_MID.md", + "MEMORY_SHORT.md", } diff --git a/cortex/static/index.html b/cortex/static/index.html index 2923952..01e9214 100644 --- a/cortex/static/index.html +++ b/cortex/static/index.html @@ -522,6 +522,164 @@ .edit-save-btn { border-color: var(--inara-border); color: var(--accent); } .edit-save-btn:hover { background: var(--inara-bg); } .edit-cancel-btn:hover { color: var(--text); border-color: var(--muted); } + + /* ── File editor modal ───────────────────────────────────── */ + #file-modal { + display: none; + position: fixed; + inset: 0; + background: rgba(0,0,0,0.7); + z-index: 200; + align-items: center; + justify-content: center; + } + #file-modal.open { display: flex; } + + #file-modal-inner { + background: var(--surface); + border: 1px solid var(--border); + border-radius: 10px; + width: min(860px, 96vw); + height: min(82vh, 800px); + display: flex; + flex-direction: column; + overflow: hidden; + } + + #file-modal-header { + display: flex; + align-items: center; + gap: 8px; + padding: 10px 14px; + border-bottom: 1px solid var(--border); + background: var(--bg); + flex-shrink: 0; + } + + #file-modal-header select { + background: var(--surface); + border: 1px solid var(--border); + border-radius: 5px; + color: var(--text); + font-size: 0.85rem; + padding: 4px 8px; + cursor: pointer; + } + + #file-modal-title { + font-size: 0.9rem; + font-weight: 600; + color: var(--accent); + flex: 1; + } + + .fm-btn { + background: var(--bg); + border: 1px solid var(--border); + border-radius: 5px; + color: var(--muted); + font-size: 0.75rem; + padding: 4px 10px; + cursor: pointer; + transition: color 0.15s, border-color 0.15s; + } + .fm-btn:hover { color: var(--text); border-color: var(--muted); } + .fm-btn.active { color: var(--accent); border-color: var(--accent); } + .fm-btn.save { color: var(--accent); border-color: var(--inara-border); } + .fm-btn.save:hover { background: var(--inara-bg); } + #file-saved-msg { + font-size: 0.75rem; + color: #6abf6a; + opacity: 0; + transition: opacity 0.3s; + } + #file-saved-msg.show { opacity: 1; } + + #file-modal-body { + flex: 1; + overflow: hidden; + display: flex; + flex-direction: column; + } + + #file-editor { + flex: 1; + width: 100%; + background: var(--bg); + color: var(--text); + border: none; + outline: none; + padding: 16px; + font-family: 'Courier New', monospace; + font-size: 0.85rem; + line-height: 1.55; + resize: none; + display: block; + } + + #file-preview { + flex: 1; + overflow-y: auto; + padding: 16px 20px; + display: none; + line-height: 1.6; + } + + #file-preview.active { display: block; } + #file-editor.hidden { display: none; } + + /* Talk activity badge on Sessions button */ + #sessions-btn.talk-badge::after { + content: '●'; + color: #7cb9e8; + margin-left: 5px; + font-size: 0.55rem; + vertical-align: middle; + } + + /* ── Context bar ─────────────────────────────────────────── */ + #context-bar { + display: flex; + align-items: center; + gap: 6px; + padding: 4px 20px; + background: var(--surface); + border-top: 1px solid var(--border); + flex-wrap: wrap; + } + + .ctx-label { + font-size: 0.63rem; + color: var(--muted); + flex-shrink: 0; + } + + .ctx-btn { + background: var(--bg); + border: 1px solid var(--border); + border-radius: 4px; + color: var(--muted); + font-size: 0.63rem; + padding: 2px 7px; + cursor: pointer; + transition: color 0.15s, border-color 0.15s, background 0.15s; + } + + .ctx-btn:hover { color: var(--text); border-color: var(--muted); } + .ctx-btn.active { color: var(--accent); border-color: var(--accent); } + .ctx-btn.mem-on { color: #6abf6a; border-color: #2a4a2a; } + + .ctx-sep { flex: 1; min-width: 8px; } + + #ctx-distill-status { + font-size: 0.62rem; + color: #6abf6a; + opacity: 0; + transition: opacity 0.3s; + white-space: nowrap; + } + #ctx-distill-status.show { opacity: 1; } + #ctx-distill-status.err { color: var(--error-text); }
@@ -532,14 +690,55 @@