""" Inara tiered memory distillation. distill_short() — roll recent session logs → MEMORY_SHORT.md (no LLM) distill_mid() — summarize MEMORY_SHORT → MEMORY_MID.md (LLM) distill_long() — integrate MEMORY_MID → MEMORY_LONG.md (LLM) """ import logging from datetime import datetime from pathlib import Path from config import settings from persona import persona_path as _persona_path logger = logging.getLogger(__name__) # Rough chars-per-token estimate for budget enforcement _CHARS_PER_TOKEN = 4 def _budget_chars(tokens: int) -> int: return tokens * _CHARS_PER_TOKEN def _read(path: Path) -> str: return path.read_text() if path.exists() else "" def distill_short(username: str | None = None, persona: str | None = None) -> dict: """ Roll the most recent session log files into MEMORY_SHORT.md. No LLM involved — pure aggregation with budget truncation. Files are included newest-first until the budget is reached, then written in chronological order (oldest first). """ inara_dir = _persona_path(username, persona) sessions_dir = inara_dir / "sessions" budget = _budget_chars(settings.memory_budget_short) session_files = ( sorted(sessions_dir.glob("*.md"), reverse=True) if sessions_dir.exists() else [] ) parts = [] total_chars = 0 for sf in session_files: content = sf.read_text() if total_chars + len(content) > budget and parts: break # always include at least one file parts.append((sf.name, content)) total_chars += len(content) if total_chars >= budget: break now = datetime.now().strftime("%Y-%m-%d %H:%M") header = ( f"# MEMORY_SHORT.md — Recent Session Digest\n\n" f"*Auto-generated: {now}. {len(parts)} session file(s).*\n\n---\n\n" ) # Write in chronological order (oldest first) body = "\n\n".join( f"--- {name} ---\n{content}" for name, content in reversed(parts) ) out_path = inara_dir / "MEMORY_SHORT.md" out_path.write_text(header + body) logger.info("distill_short: wrote %d chars from %d files", len(header) + len(body), len(parts)) return { "files_included": len(parts), "chars_written": len(header) + len(body), "budget_chars": budget, } async def distill_mid(username: str | None = None, persona: str | None = None) -> dict: """ Ask the LLM to summarize MEMORY_SHORT.md → MEMORY_MID.md. Uses DISTILL_BACKEND_MID if set (e.g. "local"), otherwise primary_backend. """ from llm_client import complete from persona import set_context u = username or settings.user_name.lower() p = persona or settings.agent_name.lower() set_context(u, p) inara_dir = _persona_path(u, p) short_content = _read(inara_dir / "MEMORY_SHORT.md") if not short_content.strip() or "Not yet populated" in short_content: return {"error": "MEMORY_SHORT.md is empty — run distill/short first"} backend_override = settings.distill_backend_mid or None budget_tokens = settings.memory_budget_mid system_prompt = ( f"You are {settings.agent_name}'s memory distillation system. " "Summarize the following recent session logs into a concise mid-term memory digest. " f"Target length: under {budget_tokens} tokens. " "Focus on: recurring themes, important decisions made, ongoing projects, " f"{settings.user_name}'s current state and priorities, and anything that should persist into future sessions. " f"Write in first person as {settings.agent_name} (e.g. '{settings.user_name} and I worked on...'). " "Use markdown headings. Be specific and concrete — no filler." ) response_text, backend = await complete( system_prompt=system_prompt, messages=[{"role": "user", "content": short_content}], model=backend_override, ) now = datetime.now().strftime("%Y-%m-%d %H:%M") header = ( f"# MEMORY_MID.md — Mid-Term Memory Digest\n\n" f"*Auto-distilled: {now} via {backend}.*\n\n---\n\n" ) out_path = inara_dir / "MEMORY_MID.md" out_path.write_text(header + response_text) logger.info("distill_mid: wrote %d chars via %s", len(header) + len(response_text), backend) return { "username": u, "backend": backend, "chars_written": len(header) + len(response_text), "budget_tokens": budget_tokens, } async def distill_long(username: str | None = None, persona: str | None = None) -> dict: """ Ask the LLM to integrate MEMORY_MID.md into MEMORY_LONG.md. Uses DISTILL_BACKEND_LONG if set, otherwise primary_backend. """ from llm_client import complete from persona import set_context u = username or settings.user_name.lower() p = persona or settings.agent_name.lower() set_context(u, p) inara_dir = _persona_path(u, p) long_content = _read(inara_dir / "MEMORY_LONG.md") mid_content = _read(inara_dir / "MEMORY_MID.md") if not mid_content.strip() or "Not yet populated" in mid_content: return {"error": "MEMORY_MID.md is empty — run distill/mid first"} backend_override = settings.distill_backend_long or None budget_tokens = settings.memory_budget_long system_prompt = ( f"You are {settings.agent_name}'s long-term memory curator. " "You will receive the current long-term memory and a recent mid-term digest. " f"Integrate the new information into the long-term memory. Target: under {budget_tokens} tokens. " "Rules: preserve important historical facts; update or replace stale information; " "absorb recurring themes from the mid-term digest; remove things no longer relevant. " "Return ONLY the updated MEMORY_LONG.md content in markdown. No preamble or commentary." ) user_content = ( f"## Current MEMORY_LONG.md\n\n{long_content}\n\n" f"## Recent MEMORY_MID.md to integrate\n\n{mid_content}" ) response_text, backend = await complete( system_prompt=system_prompt, messages=[{"role": "user", "content": user_content}], model=backend_override, ) # Ensure the file has the right header if the LLM dropped it now = datetime.now().strftime("%Y-%m-%d %H:%M") if not response_text.lstrip().startswith("# MEMORY_LONG"): response_text = ( f"# MEMORY_LONG.md — {settings.agent_name} Long-Term Memory\n\n" f"*Last distilled: {now} via {backend}.*\n\n---\n\n" + response_text ) out_path = inara_dir / "MEMORY_LONG.md" out_path.write_text(response_text) logger.info("distill_long: wrote %d chars via %s", len(response_text), backend) return { "username": u, "backend": backend, "chars_written": len(response_text), "budget_tokens": budget_tokens, }