diff --git a/cortex/memory_distiller.py b/cortex/memory_distiller.py
index 8808b30..5689fcc 100644
--- a/cortex/memory_distiller.py
+++ b/cortex/memory_distiller.py
@@ -1,9 +1,17 @@
 """
-Inara tiered memory distillation.
+Tiered memory distillation.
 
   distill_short()  — roll recent session logs → MEMORY_SHORT.md  (no LLM)
   distill_mid()    — summarize MEMORY_SHORT   → MEMORY_MID.md    (LLM)
   distill_long()   — integrate MEMORY_MID     → MEMORY_LONG.md   (LLM)
+
+Before any file is overwritten, two rolling backups are kept:
+  MEMORY_*.bak1.md — most recent backup  (created just before last write)
+  MEMORY_*.bak2.md — backup before that
+
+LLM responses are sanity-checked before writing. If the response looks like
+a refusal, is too short, or is obviously not memory content, the distill is
+aborted and the original file is left untouched.
 """
 import logging
 from datetime import datetime
@@ -16,6 +24,25 @@ logger = logging.getLogger(__name__)
 # Rough chars-per-token estimate for budget enforcement
 _CHARS_PER_TOKEN = 4
 
+# Phrases that indicate the LLM refused or misunderstood the task
+_REFUSAL_PREFIXES = (
+    "i'm sorry",
+    "i am sorry",
+    "i can't",
+    "i cannot",
+    "i'm unable",
+    "i am unable",
+    "as an ai",
+    "as a language model",
+    "i don't have access",
+    "i do not have access",
+    "i'm not able",
+    "i am not able",
+)
+
+# Minimum characters for a valid mid/long distill response
+_MIN_RESPONSE_CHARS = 80
+
 
 def _budget_chars(tokens: int) -> int:
     return tokens * _CHARS_PER_TOKEN
@@ -25,7 +52,62 @@ def _read(path: Path) -> str:
     return path.read_text() if path.exists() else ""
 
 
-def distill_short(username: str | None = None, persona: str | None = None) -> dict:
+def _rotate_backup(path: Path, n: int = 2) -> None:
+    """Rotate up to n rolling backups of path before a write.
+
+    MEMORY_LONG.md → MEMORY_LONG.bak1.md (most recent), MEMORY_LONG.bak2.md (older)
+    """
+    if not path.exists():
+        return
+    # Shift older backups down: bak(n-1) → bak(n), …, bak1 stays as bak1 source
+    for i in range(n, 1, -1):
+        older = path.parent / f"{path.stem}.bak{i}.md"
+        newer = path.parent / f"{path.stem}.bak{i - 1}.md"
+        if newer.exists():
+            older.write_text(newer.read_text())
+    # Current file → bak1
+    bak1 = path.parent / f"{path.stem}.bak1.md"
+    bak1.write_text(path.read_text())
+
+
+def _sanity_check(response_text: str, context: str, existing_content: str = "") -> str | None:
+    """Return an error string if the LLM response looks invalid, else None.
+
+    Checks:
+    - Minimum absolute length
+    - Refusal / AI preamble phrases
+    - Size shrinkage: new content must be at least 40% of the old (catches truncation)
+    - Size explosion: new content must not exceed 250% of the old (catches runaway output)
+      (Both bounds only apply when an existing file is present and reasonably sized.)
+    """
+    stripped = response_text.strip()
+    if len(stripped) < _MIN_RESPONSE_CHARS:
+        return f"{context}: response too short ({len(stripped)} chars) — not writing"
+
+    first_line = stripped.lower().splitlines()[0]
+    if any(first_line.startswith(p) for p in _REFUSAL_PREFIXES):
+        return f"{context}: response looks like a refusal — not writing"
+
+    if existing_content:
+        old_len = len(existing_content.strip())
+        new_len = len(stripped)
+        if old_len >= _MIN_RESPONSE_CHARS * 4:   # only compare when old file has real content
+            ratio = new_len / old_len
+            if ratio < 0.40:
+                return (
+                    f"{context}: new content is only {ratio:.0%} of the old "
+                    f"({new_len} vs {old_len} chars) — looks truncated, not writing"
+                )
+            if ratio > 2.50:
+                return (
+                    f"{context}: new content is {ratio:.0%} of the old "
+                    f"({new_len} vs {old_len} chars) — looks like runaway output, not writing"
+                )
+
+    return None
+
+
+def distill_short(username: str, persona: str) -> dict:
     """
     Roll the most recent session log files into MEMORY_SHORT.md.
     No LLM involved — pure aggregation with budget truncation.
@@ -64,8 +146,9 @@ def distill_short(username: str | None = None, persona: str | None = None) -> di
     )
 
     out_path = inara_dir / "MEMORY_SHORT.md"
+    _rotate_backup(out_path)
     out_path.write_text(header + body)
-    logger.info("distill_short: wrote %d chars from %d files", len(header) + len(body), len(parts))
+    logger.info("distill_short [%s/%s]: wrote %d chars from %d files", username, persona, len(header) + len(body), len(parts))
 
     return {
         "files_included": len(parts),
@@ -77,7 +160,7 @@ def distill_short(username: str | None = None, persona: str | None = None) -> di
 async def distill_mid(username: str, persona: str) -> dict:
     """
     Ask the LLM to summarize MEMORY_SHORT.md → MEMORY_MID.md.
-    Uses DISTILL_BACKEND_MID if set (e.g. "local"), otherwise primary_backend.
+    Backs up the current MEMORY_MID.md before overwriting.
     """
     from llm_client import complete
     from persona import set_context
@@ -87,6 +170,7 @@ async def distill_mid(username: str, persona: str) -> dict:
 
     inara_dir = _persona_path(u, p)
     short_content = _read(inara_dir / "MEMORY_SHORT.md")
+    existing_mid = _read(inara_dir / "MEMORY_MID.md")
 
     if not short_content.strip() or "Not yet populated" in short_content:
         return {"error": "MEMORY_SHORT.md is empty — run distill/short first"}
@@ -110,14 +194,20 @@ async def distill_mid(username: str, persona: str) -> dict:
         role="distill",
     )
 
+    err = _sanity_check(response_text, "distill_mid", existing_mid)
+    if err:
+        logger.warning(err)
+        return {"error": err}
+
     now = datetime.now().strftime("%Y-%m-%d %H:%M")
     header = (
         f"# MEMORY_MID.md — Mid-Term Memory Digest\n\n"
         f"*Auto-distilled: {now} via {backend}.*\n\n---\n\n"
     )
     out_path = inara_dir / "MEMORY_MID.md"
+    _rotate_backup(out_path)
     out_path.write_text(header + response_text)
-    logger.info("distill_mid: wrote %d chars via %s", len(header) + len(response_text), backend)
+    logger.info("distill_mid [%s/%s]: wrote %d chars via %s", u, p, len(header) + len(response_text), backend)
 
     return {
         "username": u,
@@ -130,7 +220,7 @@ async def distill_mid(username: str, persona: str) -> dict:
 async def distill_long(username: str, persona: str) -> dict:
     """
     Ask the LLM to integrate MEMORY_MID.md into MEMORY_LONG.md.
-    Uses DISTILL_BACKEND_LONG if set, otherwise primary_backend.
+    Backs up the current MEMORY_LONG.md before overwriting.
     """
     from llm_client import complete
     from persona import set_context
@@ -167,6 +257,11 @@ async def distill_long(username: str, persona: str) -> dict:
         role="distill",
     )
 
+    err = _sanity_check(response_text, "distill_long", long_content)
+    if err:
+        logger.warning(err)
+        return {"error": err}
+
     # Ensure the file has the right header if the LLM dropped it
     now = datetime.now().strftime("%Y-%m-%d %H:%M")
     if not response_text.lstrip().startswith("# MEMORY_LONG"):
@@ -177,8 +272,9 @@ async def distill_long(username: str, persona: str) -> dict:
         )
 
     out_path = inara_dir / "MEMORY_LONG.md"
+    _rotate_backup(out_path)
     out_path.write_text(response_text)
-    logger.info("distill_long: wrote %d chars via %s", len(response_text), backend)
+    logger.info("distill_long [%s/%s]: wrote %d chars via %s", u, p, len(response_text), backend)
 
     return {
         "username": u,
diff --git a/cortex/routers/files.py b/cortex/routers/files.py
index 4c24eb5..d37d091 100644
--- a/cortex/routers/files.py
+++ b/cortex/routers/files.py
@@ -16,10 +16,16 @@ ALLOWED = {
     "USER.md",
     "PROTOCOLS.md",
     "CONTEXT_TIERS.md",
-    "MEMORY.md",        # legacy — kept for reference
+    "MEMORY.md",          # legacy — kept for reference
     "MEMORY_LONG.md",
     "MEMORY_MID.md",
     "MEMORY_SHORT.md",
+    "MEMORY_LONG.bak1.md",
+    "MEMORY_LONG.bak2.md",
+    "MEMORY_MID.bak1.md",
+    "MEMORY_MID.bak2.md",
+    "MEMORY_SHORT.bak1.md",
+    "MEMORY_SHORT.bak2.md",
     "HELP.md",
 }