diff --git a/cortex/memory_distiller.py b/cortex/memory_distiller.py index 8808b30..5689fcc 100644 --- a/cortex/memory_distiller.py +++ b/cortex/memory_distiller.py @@ -1,9 +1,17 @@ """ -Inara tiered memory distillation. +Tiered memory distillation. distill_short() — roll recent session logs → MEMORY_SHORT.md (no LLM) distill_mid() — summarize MEMORY_SHORT → MEMORY_MID.md (LLM) distill_long() — integrate MEMORY_MID → MEMORY_LONG.md (LLM) + +Before any file is overwritten, two rolling backups are kept: + MEMORY_*.bak1.md — most recent backup (created just before last write) + MEMORY_*.bak2.md — backup before that + +LLM responses are sanity-checked before writing. If the response looks like +a refusal, is too short, or is obviously not memory content, the distill is +aborted and the original file is left untouched. """ import logging from datetime import datetime @@ -16,6 +24,25 @@ logger = logging.getLogger(__name__) # Rough chars-per-token estimate for budget enforcement _CHARS_PER_TOKEN = 4 +# Phrases that indicate the LLM refused or misunderstood the task +_REFUSAL_PREFIXES = ( + "i'm sorry", + "i am sorry", + "i can't", + "i cannot", + "i'm unable", + "i am unable", + "as an ai", + "as a language model", + "i don't have access", + "i do not have access", + "i'm not able", + "i am not able", +) + +# Minimum characters for a valid mid/long distill response +_MIN_RESPONSE_CHARS = 80 + def _budget_chars(tokens: int) -> int: return tokens * _CHARS_PER_TOKEN @@ -25,7 +52,62 @@ def _read(path: Path) -> str: return path.read_text() if path.exists() else "" -def distill_short(username: str | None = None, persona: str | None = None) -> dict: +def _rotate_backup(path: Path, n: int = 2) -> None: + """Rotate up to n rolling backups of path before a write. + + MEMORY_LONG.md → MEMORY_LONG.bak1.md (most recent), MEMORY_LONG.bak2.md (older) + """ + if not path.exists(): + return + # Shift older backups down: bak(n-1) → bak(n), …, bak1 stays as bak1 source + for i in range(n, 1, -1): + older = path.parent / f"{path.stem}.bak{i}.md" + newer = path.parent / f"{path.stem}.bak{i - 1}.md" + if newer.exists(): + older.write_text(newer.read_text()) + # Current file → bak1 + bak1 = path.parent / f"{path.stem}.bak1.md" + bak1.write_text(path.read_text()) + + +def _sanity_check(response_text: str, context: str, existing_content: str = "") -> str | None: + """Return an error string if the LLM response looks invalid, else None. + + Checks: + - Minimum absolute length + - Refusal / AI preamble phrases + - Size shrinkage: new content must be at least 40% of the old (catches truncation) + - Size explosion: new content must not exceed 250% of the old (catches runaway output) + (Both bounds only apply when an existing file is present and reasonably sized.) + """ + stripped = response_text.strip() + if len(stripped) < _MIN_RESPONSE_CHARS: + return f"{context}: response too short ({len(stripped)} chars) — not writing" + + first_line = stripped.lower().splitlines()[0] + if any(first_line.startswith(p) for p in _REFUSAL_PREFIXES): + return f"{context}: response looks like a refusal — not writing" + + if existing_content: + old_len = len(existing_content.strip()) + new_len = len(stripped) + if old_len >= _MIN_RESPONSE_CHARS * 4: # only compare when old file has real content + ratio = new_len / old_len + if ratio < 0.40: + return ( + f"{context}: new content is only {ratio:.0%} of the old " + f"({new_len} vs {old_len} chars) — looks truncated, not writing" + ) + if ratio > 2.50: + return ( + f"{context}: new content is {ratio:.0%} of the old " + f"({new_len} vs {old_len} chars) — looks like runaway output, not writing" + ) + + return None + + +def distill_short(username: str, persona: str) -> dict: """ Roll the most recent session log files into MEMORY_SHORT.md. No LLM involved — pure aggregation with budget truncation. @@ -64,8 +146,9 @@ def distill_short(username: str | None = None, persona: str | None = None) -> di ) out_path = inara_dir / "MEMORY_SHORT.md" + _rotate_backup(out_path) out_path.write_text(header + body) - logger.info("distill_short: wrote %d chars from %d files", len(header) + len(body), len(parts)) + logger.info("distill_short [%s/%s]: wrote %d chars from %d files", username, persona, len(header) + len(body), len(parts)) return { "files_included": len(parts), @@ -77,7 +160,7 @@ def distill_short(username: str | None = None, persona: str | None = None) -> di async def distill_mid(username: str, persona: str) -> dict: """ Ask the LLM to summarize MEMORY_SHORT.md → MEMORY_MID.md. - Uses DISTILL_BACKEND_MID if set (e.g. "local"), otherwise primary_backend. + Backs up the current MEMORY_MID.md before overwriting. """ from llm_client import complete from persona import set_context @@ -87,6 +170,7 @@ async def distill_mid(username: str, persona: str) -> dict: inara_dir = _persona_path(u, p) short_content = _read(inara_dir / "MEMORY_SHORT.md") + existing_mid = _read(inara_dir / "MEMORY_MID.md") if not short_content.strip() or "Not yet populated" in short_content: return {"error": "MEMORY_SHORT.md is empty — run distill/short first"} @@ -110,14 +194,20 @@ async def distill_mid(username: str, persona: str) -> dict: role="distill", ) + err = _sanity_check(response_text, "distill_mid", existing_mid) + if err: + logger.warning(err) + return {"error": err} + now = datetime.now().strftime("%Y-%m-%d %H:%M") header = ( f"# MEMORY_MID.md — Mid-Term Memory Digest\n\n" f"*Auto-distilled: {now} via {backend}.*\n\n---\n\n" ) out_path = inara_dir / "MEMORY_MID.md" + _rotate_backup(out_path) out_path.write_text(header + response_text) - logger.info("distill_mid: wrote %d chars via %s", len(header) + len(response_text), backend) + logger.info("distill_mid [%s/%s]: wrote %d chars via %s", u, p, len(header) + len(response_text), backend) return { "username": u, @@ -130,7 +220,7 @@ async def distill_mid(username: str, persona: str) -> dict: async def distill_long(username: str, persona: str) -> dict: """ Ask the LLM to integrate MEMORY_MID.md into MEMORY_LONG.md. - Uses DISTILL_BACKEND_LONG if set, otherwise primary_backend. + Backs up the current MEMORY_LONG.md before overwriting. """ from llm_client import complete from persona import set_context @@ -167,6 +257,11 @@ async def distill_long(username: str, persona: str) -> dict: role="distill", ) + err = _sanity_check(response_text, "distill_long", long_content) + if err: + logger.warning(err) + return {"error": err} + # Ensure the file has the right header if the LLM dropped it now = datetime.now().strftime("%Y-%m-%d %H:%M") if not response_text.lstrip().startswith("# MEMORY_LONG"): @@ -177,8 +272,9 @@ async def distill_long(username: str, persona: str) -> dict: ) out_path = inara_dir / "MEMORY_LONG.md" + _rotate_backup(out_path) out_path.write_text(response_text) - logger.info("distill_long: wrote %d chars via %s", len(response_text), backend) + logger.info("distill_long [%s/%s]: wrote %d chars via %s", u, p, len(response_text), backend) return { "username": u, diff --git a/cortex/routers/files.py b/cortex/routers/files.py index 4c24eb5..d37d091 100644 --- a/cortex/routers/files.py +++ b/cortex/routers/files.py @@ -16,10 +16,16 @@ ALLOWED = { "USER.md", "PROTOCOLS.md", "CONTEXT_TIERS.md", - "MEMORY.md", # legacy — kept for reference + "MEMORY.md", # legacy — kept for reference "MEMORY_LONG.md", "MEMORY_MID.md", "MEMORY_SHORT.md", + "MEMORY_LONG.bak1.md", + "MEMORY_LONG.bak2.md", + "MEMORY_MID.bak1.md", + "MEMORY_MID.bak2.md", + "MEMORY_SHORT.bak1.md", + "MEMORY_SHORT.bak2.md", "HELP.md", }