Compare commits

...

2 Commits

Author SHA1 Message Date
Scott Idem
ce3c1f5f7f Add tiered memory system with manual distillation
- config.py: memory_budget_long/mid/short settings (overridable in .env)
- memory_distiller.py: distill_short (no LLM), distill_mid, distill_long (LLM)
- routers/distill.py: POST /distill/{short,mid,long,all} endpoints
- context_loader.py: rewrote to load long→mid→short order with include_* toggles
- routers/chat.py: ChatRequest gains include_long/mid/short fields
- routers/files.py: MEMORY_LONG/MID/SHORT.md added to ALLOWED set
- main.py: register distill router
- static/index.html: context bar — tier selector, L/M/S memory toggles,
  distill buttons with status feedback; send includes tier + memory flags
- inara/MEMORY_LONG.md: migrated from MEMORY.md + Cortex/Talk bot notes
- inara/MEMORY_MID.md, MEMORY_SHORT.md: stubs ready for distillation

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 21:22:32 -04:00
Scott Idem
3455c7a09c Add SSE real-time Talk activity, file editor UI, and identity file API
- event_bus.py: in-process asyncio pub/sub (one Queue per SSE client)
- nextcloud_talk.py: publishes nct_message/nct_response events to bus
- chat.py: GET /events SSE endpoint streams Talk activity to browser
- routers/files.py: whitelist-protected GET/PUT for Inara identity .md files
- main.py: register files router
- static/index.html: real-time Talk feed, blue badge on Sessions btn,
  Files modal with preview/edit toggle and Ctrl+S save

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-17 21:10:07 -04:00
17 changed files with 1237 additions and 28 deletions

View File

@@ -26,6 +26,12 @@ class Settings(BaseSettings):
nextcloud_talk_bot_secret: str = "" # set in .env
nextcloud_talk_timeout: int = 55
# Memory tier token budgets — soft caps used during distillation
# Override in .env: MEMORY_BUDGET_LONG=4000 etc.
memory_budget_long: int = 2000
memory_budget_mid: int = 2000
memory_budget_short: int = 3000
host: str = "0.0.0.0"
port: int = 8000

View File

@@ -2,46 +2,83 @@ from pathlib import Path
from config import settings
# Files loaded per tier — mirrors CONTEXT_TIERS.md
TIER_FILES: dict[int, list[str]] = {
1: ["SOUL.md", "IDENTITY.md"], # + USER.md summary only
2: ["SOUL.md", "IDENTITY.md", "USER.md", "MEMORY.md", "PROTOCOLS.md"],
3: ["SOUL.md", "IDENTITY.md", "USER.md", "MEMORY.md", "PROTOCOLS.md"],
4: ["SOUL.md", "IDENTITY.md", "USER.md", "MEMORY.md", "PROTOCOLS.md"],
}
# Core identity files — always loaded regardless of tier
_CORE = ["SOUL.md", "IDENTITY.md"]
# Lines of USER.md to include at Tier 1 (just identity + what he cares about)
TIER_1_USER_LINES = 30
# Lines of USER.md to include at Tier 1 (identity + what he cares about)
_TIER_1_USER_LINES = 30
def _read(path: Path) -> str:
if path.exists():
return path.read_text()
return f"[missing: {path.name}]"
def load_context(
tier: int = 2,
include_long: bool = True,
include_mid: bool = True,
include_short: bool = True,
) -> str:
"""
Build the system-prompt context block for a given tier and memory toggles.
Load order (long → mid → short) keeps the most recent memory closest
to the conversation turn, which improves LLM recall.
def load_context(tier: int = 2) -> str:
Tier 1 — SOUL + IDENTITY + USER summary (~1,500 tokens)
Tier 2 — + USER full + PROTOCOLS + memory (~5,000 tokens)
Tier 3 — + last 2 raw session logs (~15,000 tokens)
Tier 4 — + last 7 raw session logs (~50,000 tokens)
"""
inara_dir = settings.inara_path()
parts = []
files = TIER_FILES.get(tier, TIER_FILES[2])
for filename in files:
# ── 1. Core identity (always) ──────────────────────────────────
for filename in _CORE:
path = inara_dir / filename
if not path.exists():
continue
if path.exists():
parts.append(f"--- {filename} ---\n{path.read_text()}")
if filename == "USER.md" and tier == 1:
# Tier 1: include only the first N lines
lines = path.read_text().splitlines()[:TIER_1_USER_LINES]
# ── 2. USER.md ─────────────────────────────────────────────────
user_path = inara_dir / "USER.md"
if user_path.exists():
if tier == 1:
lines = user_path.read_text().splitlines()[:_TIER_1_USER_LINES]
content = "\n".join(lines)
else:
content = path.read_text()
content = user_path.read_text()
parts.append(f"--- USER.md ---\n{content}")
parts.append(f"--- {filename} ---\n{content}")
if tier < 2:
return "\n\n".join(parts)
# ── 3. Protocols (tier 2+) ─────────────────────────────────────
proto_path = inara_dir / "PROTOCOLS.md"
if proto_path.exists():
parts.append(f"--- PROTOCOLS.md ---\n{proto_path.read_text()}")
# ── 4. Tiered memory — long → mid → short ─────────────────────
# Short is last so it sits closest to the conversation turn.
if include_long:
# Fall back to legacy MEMORY.md during/after migration
long_path = inara_dir / "MEMORY_LONG.md"
if not long_path.exists():
long_path = inara_dir / "MEMORY.md"
if long_path.exists():
parts.append(f"--- {long_path.name} ---\n{long_path.read_text()}")
if include_mid:
mid_path = inara_dir / "MEMORY_MID.md"
if mid_path.exists() and mid_path.stat().st_size > 100:
content = mid_path.read_text()
if "Not yet populated" not in content:
parts.append(f"--- MEMORY_MID.md ---\n{content}")
if include_short:
short_path = inara_dir / "MEMORY_SHORT.md"
if short_path.exists() and short_path.stat().st_size > 100:
content = short_path.read_text()
if "Not yet populated" not in content:
parts.append(f"--- MEMORY_SHORT.md ---\n{content}")
# ── 5. Raw session logs (tier 3+) ──────────────────────────────
if tier >= 3:
# Add recent session logs
sessions_dir = inara_dir / "sessions"
if sessions_dir.exists():
count = 2 if tier == 3 else 7

33
cortex/event_bus.py Normal file
View File

@@ -0,0 +1,33 @@
"""
Simple in-process pub/sub for server-sent events.
Usage:
# Publisher (e.g. nextcloud_talk router)
await event_bus.publish({"type": "nct_message", ...})
# Consumer (SSE endpoint in chat router)
q = event_bus.subscribe()
try:
event = await asyncio.wait_for(q.get(), timeout=20)
finally:
event_bus.unsubscribe(q)
"""
import asyncio
from typing import Any
_subscribers: set[asyncio.Queue] = set()
def subscribe() -> asyncio.Queue:
q: asyncio.Queue = asyncio.Queue()
_subscribers.add(q)
return q
def unsubscribe(q: asyncio.Queue) -> None:
_subscribers.discard(q)
async def publish(event: dict[str, Any]) -> None:
for q in list(_subscribers):
await q.put(event)

View File

@@ -8,7 +8,7 @@ import uvicorn
logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(name)s: %(message)s")
from config import settings
from routers import chat, google_chat, nextcloud_talk
from routers import chat, google_chat, nextcloud_talk, files, distill
@asynccontextmanager
@@ -23,6 +23,8 @@ app = FastAPI(title="Cortex Dispatcher", lifespan=lifespan)
app.include_router(chat.router)
app.include_router(google_chat.router)
app.include_router(nextcloud_talk.router)
app.include_router(files.router)
app.include_router(distill.router)
app.mount("/static", StaticFiles(directory="static"), name="static")

170
cortex/memory_distiller.py Normal file
View File

@@ -0,0 +1,170 @@
"""
Inara tiered memory distillation.
distill_short() — roll recent session logs → MEMORY_SHORT.md (no LLM)
distill_mid() — summarize MEMORY_SHORT → MEMORY_MID.md (LLM)
distill_long() — integrate MEMORY_MID → MEMORY_LONG.md (LLM)
"""
import logging
from datetime import datetime
from pathlib import Path
from config import settings
logger = logging.getLogger(__name__)
# Rough chars-per-token estimate for budget enforcement
_CHARS_PER_TOKEN = 4
def _budget_chars(tokens: int) -> int:
return tokens * _CHARS_PER_TOKEN
def _read(path: Path) -> str:
return path.read_text() if path.exists() else ""
def distill_short() -> dict:
"""
Roll the most recent session log files into MEMORY_SHORT.md.
No LLM involved — pure aggregation with budget truncation.
Files are included newest-first until the budget is reached,
then written in chronological order (oldest first).
"""
inara_dir = settings.inara_path()
sessions_dir = inara_dir / "sessions"
budget = _budget_chars(settings.memory_budget_short)
session_files = (
sorted(sessions_dir.glob("*.md"), reverse=True)
if sessions_dir.exists()
else []
)
parts = []
total_chars = 0
for sf in session_files:
content = sf.read_text()
if total_chars + len(content) > budget and parts:
break # always include at least one file
parts.append((sf.name, content))
total_chars += len(content)
if total_chars >= budget:
break
now = datetime.now().strftime("%Y-%m-%d %H:%M")
header = (
f"# MEMORY_SHORT.md — Recent Session Digest\n\n"
f"*Auto-generated: {now}. {len(parts)} session file(s).*\n\n---\n\n"
)
# Write in chronological order (oldest first)
body = "\n\n".join(
f"--- {name} ---\n{content}" for name, content in reversed(parts)
)
out_path = inara_dir / "MEMORY_SHORT.md"
out_path.write_text(header + body)
logger.info("distill_short: wrote %d chars from %d files", len(header) + len(body), len(parts))
return {
"files_included": len(parts),
"chars_written": len(header) + len(body),
"budget_chars": budget,
}
async def distill_mid() -> dict:
"""
Ask the LLM to summarize MEMORY_SHORT.md → MEMORY_MID.md.
"""
from llm_client import complete
inara_dir = settings.inara_path()
short_content = _read(inara_dir / "MEMORY_SHORT.md")
if not short_content.strip() or "Not yet populated" in short_content:
return {"error": "MEMORY_SHORT.md is empty — run distill/short first"}
budget_tokens = settings.memory_budget_mid
system_prompt = (
"You are Inara's memory distillation system. "
"Summarize the following recent session logs into a concise mid-term memory digest. "
f"Target length: under {budget_tokens} tokens. "
"Focus on: recurring themes, important decisions made, ongoing projects, "
"Scott's current state and priorities, and anything that should persist into future sessions. "
"Write in first person as Inara (e.g. 'Scott and I worked on...'). "
"Use markdown headings. Be specific and concrete — no filler."
)
response_text, backend = await complete(
system_prompt=system_prompt,
messages=[{"role": "user", "content": short_content}],
)
now = datetime.now().strftime("%Y-%m-%d %H:%M")
header = (
f"# MEMORY_MID.md — Mid-Term Memory Digest\n\n"
f"*Auto-distilled: {now} via {backend}.*\n\n---\n\n"
)
out_path = inara_dir / "MEMORY_MID.md"
out_path.write_text(header + response_text)
logger.info("distill_mid: wrote %d chars via %s", len(header) + len(response_text), backend)
return {
"backend": backend,
"chars_written": len(header) + len(response_text),
"budget_tokens": budget_tokens,
}
async def distill_long() -> dict:
"""
Ask the LLM to integrate MEMORY_MID.md into MEMORY_LONG.md.
"""
from llm_client import complete
inara_dir = settings.inara_path()
long_content = _read(inara_dir / "MEMORY_LONG.md")
mid_content = _read(inara_dir / "MEMORY_MID.md")
if not mid_content.strip() or "Not yet populated" in mid_content:
return {"error": "MEMORY_MID.md is empty — run distill/mid first"}
budget_tokens = settings.memory_budget_long
system_prompt = (
"You are Inara's long-term memory curator. "
"You will receive the current long-term memory and a recent mid-term digest. "
f"Integrate the new information into the long-term memory. Target: under {budget_tokens} tokens. "
"Rules: preserve important historical facts; update or replace stale information; "
"absorb recurring themes from the mid-term digest; remove things no longer relevant. "
"Return ONLY the updated MEMORY_LONG.md content in markdown. No preamble or commentary."
)
user_content = (
f"## Current MEMORY_LONG.md\n\n{long_content}\n\n"
f"## Recent MEMORY_MID.md to integrate\n\n{mid_content}"
)
response_text, backend = await complete(
system_prompt=system_prompt,
messages=[{"role": "user", "content": user_content}],
)
# Ensure the file has the right header if the LLM dropped it
now = datetime.now().strftime("%Y-%m-%d %H:%M")
if not response_text.lstrip().startswith("# MEMORY_LONG"):
response_text = (
f"# MEMORY_LONG.md — Inara Long-Term Memory\n\n"
f"*Last distilled: {now} via {backend}.*\n\n---\n\n"
+ response_text
)
out_path = inara_dir / "MEMORY_LONG.md"
out_path.write_text(response_text)
logger.info("distill_long: wrote %d chars via %s", len(response_text), backend)
return {
"backend": backend,
"chars_written": len(response_text),
"budget_tokens": budget_tokens,
}

View File

@@ -8,6 +8,7 @@ from llm_client import complete
from session_logger import log_turn
from session_store import load as load_session, save as save_session, list_all, generate_session_id
from config import settings
import event_bus
router = APIRouter()
@@ -18,6 +19,9 @@ class ChatRequest(BaseModel):
session_id: str | None = None
tier: int | None = None
model: str | None = None # "claude" or "gemini" to override; None = use primary_backend
include_long: bool = True
include_mid: bool = True
include_short: bool = True
class BackendRequest(BaseModel):
@@ -48,7 +52,12 @@ async def _stream_chat(req: ChatRequest):
session_id = req.session_id or generate_session_id()
tier = req.tier or settings.default_tier
system_prompt = load_context(tier)
system_prompt = load_context(
tier,
include_long=req.include_long,
include_mid=req.include_mid,
include_short=req.include_short,
)
history = load_session(session_id)
history.append({"role": "user", "content": req.message})
@@ -141,6 +150,30 @@ async def replace_history(session_id: str, req: HistoryUpdate) -> dict:
return {"ok": True, "session_id": session_id}
@router.get("/events")
async def sse_events() -> StreamingResponse:
"""Server-sent events stream — pushes real-time Talk activity to the browser."""
async def stream():
q = event_bus.subscribe()
try:
while True:
try:
event = await asyncio.wait_for(q.get(), timeout=20)
yield f"data: {json.dumps(event)}\n\n"
except asyncio.TimeoutError:
yield 'data: {"type":"keepalive"}\n\n'
except (GeneratorExit, asyncio.CancelledError):
pass
finally:
event_bus.unsubscribe(q)
return StreamingResponse(
stream(),
media_type="text/event-stream",
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
)
@router.post("/note")
async def add_note(req: NoteRequest) -> dict:
"""Inject a public note into session history so the LLM sees it next turn."""

44
cortex/routers/distill.py Normal file
View File

@@ -0,0 +1,44 @@
"""
Manual memory distillation endpoints.
POST /distill/short — roll session logs → MEMORY_SHORT.md (no LLM)
POST /distill/mid — summarize short → MEMORY_MID.md (LLM)
POST /distill/long — integrate mid → MEMORY_LONG.md (LLM)
POST /distill/all — run all three in sequence
"""
from fastapi import APIRouter
from memory_distiller import distill_short, distill_mid, distill_long
router = APIRouter(prefix="/distill")
@router.post("/short")
async def do_distill_short() -> dict:
return {"ok": True, **distill_short()}
@router.post("/mid")
async def do_distill_mid() -> dict:
result = await distill_mid()
return {"ok": "error" not in result, **result}
@router.post("/long")
async def do_distill_long() -> dict:
result = await distill_long()
return {"ok": "error" not in result, **result}
@router.post("/all")
async def do_distill_all() -> dict:
short_result = distill_short()
mid_result = await distill_mid()
if "error" in mid_result:
return {"ok": False, "short": short_result, "mid": mid_result}
long_result = await distill_long()
return {
"ok": "error" not in long_result,
"short": short_result,
"mid": mid_result,
"long": long_result,
}

60
cortex/routers/files.py Normal file
View File

@@ -0,0 +1,60 @@
"""
Read/write the Inara identity markdown files.
Only whitelisted filenames are accessible — no path traversal possible.
"""
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from config import settings
router = APIRouter()
ALLOWED = {
"SOUL.md",
"IDENTITY.md",
"USER.md",
"PROTOCOLS.md",
"CONTEXT_TIERS.md",
"MEMORY.md", # legacy — kept for reference
"MEMORY_LONG.md",
"MEMORY_MID.md",
"MEMORY_SHORT.md",
}
def _path(filename: str):
if filename not in ALLOWED:
raise HTTPException(status_code=404, detail=f"File not found: {filename}")
return settings.inara_path() / filename
@router.get("/files")
async def list_files() -> dict:
inara_dir = settings.inara_path()
files = []
for name in sorted(ALLOWED):
p = inara_dir / name
files.append({
"name": name,
"exists": p.exists(),
"size": p.stat().st_size if p.exists() else 0,
})
return {"files": files}
@router.get("/files/{filename}")
async def get_file(filename: str) -> dict:
p = _path(filename)
if not p.exists():
raise HTTPException(status_code=404, detail=f"{filename} does not exist")
return {"name": filename, "content": p.read_text()}
class FileWrite(BaseModel):
content: str
@router.put("/files/{filename}")
async def save_file(filename: str, req: FileWrite) -> dict:
p = _path(filename)
p.write_text(req.content)
return {"ok": True, "name": filename, "size": len(req.content)}

View File

@@ -13,6 +13,7 @@ from context_loader import load_context
from llm_client import complete
from session_logger import log_turn
from session_store import load as load_session, save as save_session
import event_bus
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
@@ -78,6 +79,14 @@ async def _process_message(conversation_token: str, user_text: str, actor_name:
history = load_session(session_id)
history.append({"role": "user", "content": user_text})
await event_bus.publish({
"type": "nct_message",
"session_id": session_id,
"role": "user",
"content": user_text,
"actor": actor_name,
})
try:
response_text, backend = await asyncio.wait_for(
complete(system_prompt=system_prompt, messages=history),
@@ -96,6 +105,15 @@ async def _process_message(conversation_token: str, user_text: str, actor_name:
history.append({"role": "assistant", "content": response_text})
save_session(session_id, history)
log_turn(session_id, user_text, response_text)
await event_bus.publish({
"type": "nct_response",
"session_id": session_id,
"role": "assistant",
"content": response_text,
"backend": backend,
})
await _send_reply(conversation_token, response_text)

View File

@@ -522,6 +522,164 @@
.edit-save-btn { border-color: var(--inara-border); color: var(--accent); }
.edit-save-btn:hover { background: var(--inara-bg); }
.edit-cancel-btn:hover { color: var(--text); border-color: var(--muted); }
/* ── File editor modal ───────────────────────────────────── */
#file-modal {
display: none;
position: fixed;
inset: 0;
background: rgba(0,0,0,0.7);
z-index: 200;
align-items: center;
justify-content: center;
}
#file-modal.open { display: flex; }
#file-modal-inner {
background: var(--surface);
border: 1px solid var(--border);
border-radius: 10px;
width: min(860px, 96vw);
height: min(82vh, 800px);
display: flex;
flex-direction: column;
overflow: hidden;
}
#file-modal-header {
display: flex;
align-items: center;
gap: 8px;
padding: 10px 14px;
border-bottom: 1px solid var(--border);
background: var(--bg);
flex-shrink: 0;
}
#file-modal-header select {
background: var(--surface);
border: 1px solid var(--border);
border-radius: 5px;
color: var(--text);
font-size: 0.85rem;
padding: 4px 8px;
cursor: pointer;
}
#file-modal-title {
font-size: 0.9rem;
font-weight: 600;
color: var(--accent);
flex: 1;
}
.fm-btn {
background: var(--bg);
border: 1px solid var(--border);
border-radius: 5px;
color: var(--muted);
font-size: 0.75rem;
padding: 4px 10px;
cursor: pointer;
transition: color 0.15s, border-color 0.15s;
}
.fm-btn:hover { color: var(--text); border-color: var(--muted); }
.fm-btn.active { color: var(--accent); border-color: var(--accent); }
.fm-btn.save { color: var(--accent); border-color: var(--inara-border); }
.fm-btn.save:hover { background: var(--inara-bg); }
#file-saved-msg {
font-size: 0.75rem;
color: #6abf6a;
opacity: 0;
transition: opacity 0.3s;
}
#file-saved-msg.show { opacity: 1; }
#file-modal-body {
flex: 1;
overflow: hidden;
display: flex;
flex-direction: column;
}
#file-editor {
flex: 1;
width: 100%;
background: var(--bg);
color: var(--text);
border: none;
outline: none;
padding: 16px;
font-family: 'Courier New', monospace;
font-size: 0.85rem;
line-height: 1.55;
resize: none;
display: block;
}
#file-preview {
flex: 1;
overflow-y: auto;
padding: 16px 20px;
display: none;
line-height: 1.6;
}
#file-preview.active { display: block; }
#file-editor.hidden { display: none; }
/* Talk activity badge on Sessions button */
#sessions-btn.talk-badge::after {
content: '●';
color: #7cb9e8;
margin-left: 5px;
font-size: 0.55rem;
vertical-align: middle;
}
/* ── Context bar ─────────────────────────────────────────── */
#context-bar {
display: flex;
align-items: center;
gap: 6px;
padding: 4px 20px;
background: var(--surface);
border-top: 1px solid var(--border);
flex-wrap: wrap;
}
.ctx-label {
font-size: 0.63rem;
color: var(--muted);
flex-shrink: 0;
}
.ctx-btn {
background: var(--bg);
border: 1px solid var(--border);
border-radius: 4px;
color: var(--muted);
font-size: 0.63rem;
padding: 2px 7px;
cursor: pointer;
transition: color 0.15s, border-color 0.15s, background 0.15s;
}
.ctx-btn:hover { color: var(--text); border-color: var(--muted); }
.ctx-btn.active { color: var(--accent); border-color: var(--accent); }
.ctx-btn.mem-on { color: #6abf6a; border-color: #2a4a2a; }
.ctx-sep { flex: 1; min-width: 8px; }
#ctx-distill-status {
font-size: 0.62rem;
color: #6abf6a;
opacity: 0;
transition: opacity 0.3s;
white-space: nowrap;
}
#ctx-distill-status.show { opacity: 1; }
#ctx-distill-status.err { color: var(--error-text); }
</style>
</head>
<body>
@@ -532,14 +690,55 @@
<div class="subtitle">Cortex · Local</div>
</div>
<button id="sessions-btn" class="hdr-btn">Sessions</button>
<button id="files-btn" class="hdr-btn">Files</button>
<button id="backend-toggle" class="hdr-btn" title="Click to switch primary backend">claude</button>
<div id="sessions-panel"></div>
</header>
<!-- File editor modal -->
<div id="file-modal">
<div id="file-modal-inner">
<div id="file-modal-header">
<span id="file-modal-title">Context Files</span>
<select id="file-select"></select>
<button class="fm-btn" id="file-raw-btn">edit</button>
<button class="fm-btn active" id="file-preview-btn">preview</button>
<button class="fm-btn save" id="file-save-btn">Save</button>
<span id="file-saved-msg">saved ✓</span>
<button class="fm-btn" id="file-close-btn"></button>
</div>
<div id="file-modal-body">
<textarea id="file-editor" spellcheck="false"></textarea>
<div id="file-preview"></div>
</div>
</div>
</div>
<div id="messages"></div>
<div id="session-id"></div>
<!-- Context / memory controls -->
<div id="context-bar">
<span class="ctx-label">Tier:</span>
<button class="ctx-btn" data-tier="1" id="tier-1">1</button>
<button class="ctx-btn active" data-tier="2" id="tier-2">2</button>
<button class="ctx-btn" data-tier="3" id="tier-3">3</button>
<button class="ctx-btn" data-tier="4" id="tier-4">4</button>
<span class="ctx-sep"></span>
<span class="ctx-label">Mem:</span>
<button class="ctx-btn mem-on" id="mem-long-btn" title="Long-term memory (MEMORY_LONG.md)">L</button>
<button class="ctx-btn mem-on" id="mem-mid-btn" title="Mid-term memory (MEMORY_MID.md)">M</button>
<button class="ctx-btn mem-on" id="mem-short-btn" title="Short-term memory (MEMORY_SHORT.md)">S</button>
<span class="ctx-sep"></span>
<span class="ctx-label">Distill:</span>
<button class="ctx-btn" id="distill-short-btn" title="Roll session logs → MEMORY_SHORT">short</button>
<button class="ctx-btn" id="distill-mid-btn" title="Summarize short → MEMORY_MID (LLM)">mid</button>
<button class="ctx-btn" id="distill-long-btn" title="Integrate mid → MEMORY_LONG (LLM)">long</button>
<button class="ctx-btn" id="distill-all-btn" title="Run all three distillation steps">all</button>
<span id="ctx-distill-status"></span>
</div>
<div id="input-area">
<textarea id="input" rows="1" placeholder="Message Inara… (Ctrl+Enter to send)" autofocus></textarea>
<div id="right-col">
@@ -581,6 +780,7 @@
let primaryBackend = 'claude';
let activeController = null;
let currentHistory = []; // mirrors backend session [{role, content}, ...]
let talkThinkingDiv = null; // pending "thinking…" bubble for live Talk updates
// ── Enter toggle ─────────────────────────────────────────────
// Default: Ctrl+Enter sends. Stored in localStorage.
@@ -769,6 +969,8 @@
}
async function resumeSession(id) {
talkThinkingDiv = null;
if (id && id.startsWith('nct_')) sessionsBtn.classList.remove('talk-badge');
const res = await fetch(`/history/${id}`);
const data = await res.json();
@@ -1092,7 +1294,14 @@
const res = await fetch('/chat', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ message: text, session_id: sessionId }),
body: JSON.stringify({
message: text,
session_id: sessionId,
tier: currentTier,
include_long: memLong,
include_mid: memMid,
include_short: memShort,
}),
signal: activeController.signal,
});
@@ -1166,6 +1375,214 @@
inputEl.addEventListener('input', syncHeight);
// ── File editor ──────────────────────────────────────────────
const fileModal = document.getElementById('file-modal');
const fileSelect = document.getElementById('file-select');
const fileEditor = document.getElementById('file-editor');
const filePreview = document.getElementById('file-preview');
const fileRawBtn = document.getElementById('file-raw-btn');
const filePreviewBtn = document.getElementById('file-preview-btn');
const fileSaveBtn = document.getElementById('file-save-btn');
const fileSavedMsg = document.getElementById('file-saved-msg');
const fileCloseBtn = document.getElementById('file-close-btn');
const filesBtn = document.getElementById('files-btn');
let fileMode = 'preview'; // 'edit' or 'preview'
function setFileMode(mode) {
fileMode = mode;
if (mode === 'edit') {
fileEditor.classList.remove('hidden');
filePreview.classList.remove('active');
fileRawBtn.classList.add('active');
filePreviewBtn.classList.remove('active');
} else {
fileEditor.classList.add('hidden');
filePreview.classList.add('active');
fileRawBtn.classList.remove('active');
filePreviewBtn.classList.add('active');
if (typeof marked !== 'undefined') {
filePreview.innerHTML = marked.parse(fileEditor.value);
filePreview.querySelectorAll('a').forEach(a => {
a.target = '_blank'; a.rel = 'noopener noreferrer';
});
}
}
}
async function loadFile(name) {
const res = await fetch(`/files/${encodeURIComponent(name)}`);
if (!res.ok) { fileEditor.value = `Error loading ${name}`; return; }
const data = await res.json();
fileEditor.value = data.content;
document.getElementById('file-modal-title').textContent = name;
setFileMode(fileMode);
}
async function openFileModal() {
// Populate the file list
const res = await fetch('/files');
const data = await res.json();
fileSelect.innerHTML = '';
for (const f of data.files) {
const opt = document.createElement('option');
opt.value = f.name;
opt.textContent = f.name + (f.exists ? '' : ' (missing)');
fileSelect.appendChild(opt);
}
fileModal.classList.add('open');
await loadFile(fileSelect.value);
}
filesBtn.addEventListener('click', openFileModal);
fileSelect.addEventListener('change', () => loadFile(fileSelect.value));
fileRawBtn.addEventListener('click', () => setFileMode('edit'));
filePreviewBtn.addEventListener('click', () => setFileMode('preview'));
fileSaveBtn.addEventListener('click', async () => {
const name = fileSelect.value;
const res = await fetch(`/files/${encodeURIComponent(name)}`, {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ content: fileEditor.value }),
});
if (res.ok) {
fileSavedMsg.classList.add('show');
setTimeout(() => fileSavedMsg.classList.remove('show'), 2000);
}
});
fileCloseBtn.addEventListener('click', () => fileModal.classList.remove('open'));
fileModal.addEventListener('click', (e) => {
if (e.target === fileModal) fileModal.classList.remove('open');
});
document.addEventListener('keydown', (e) => {
if (e.key === 'Escape' && fileModal.classList.contains('open')) {
fileModal.classList.remove('open');
}
// Ctrl+S to save when modal is open
if ((e.ctrlKey || e.metaKey) && e.key === 's' && fileModal.classList.contains('open')) {
e.preventDefault();
fileSaveBtn.click();
}
});
// ── Real-time Talk updates (SSE) ─────────────────────────────
const evtSource = new EventSource('/events');
evtSource.onmessage = (e) => {
let data;
try { data = JSON.parse(e.data); } catch { return; }
if (data.type === 'keepalive') return;
if (data.type !== 'nct_message' && data.type !== 'nct_response') return;
if (sessionId === data.session_id) {
// Active session — append live
if (data.type === 'nct_message') {
// Clear any stale thinking div before new user msg
if (talkThinkingDiv) { talkThinkingDiv.remove(); talkThinkingDiv = null; }
addMessage('user', data.content);
talkThinkingDiv = addMessage('assistant thinking', '✨ thinking…');
} else {
if (talkThinkingDiv) {
talkThinkingDiv.className = 'message assistant';
setMessageText(talkThinkingDiv, 'assistant', data.content);
talkThinkingDiv = null;
} else {
addMessage('assistant', data.content);
}
scrollToBottom();
}
} else {
// Different session — light badge on Sessions button
if (data.type === 'nct_message') {
sessionsBtn.classList.add('talk-badge');
}
}
};
// ── Context bar — tier + memory toggles + distill ────────────
let currentTier = parseInt(localStorage.getItem('ctx-tier') || '2');
let memLong = localStorage.getItem('mem-long') !== 'false';
let memMid = localStorage.getItem('mem-mid') !== 'false';
let memShort = localStorage.getItem('mem-short') !== 'false';
const distillStatus = document.getElementById('ctx-distill-status');
function updateTierUI() {
document.querySelectorAll('.ctx-btn[data-tier]').forEach(btn => {
btn.classList.toggle('active', parseInt(btn.dataset.tier) === currentTier);
});
}
function updateMemUI() {
document.getElementById('mem-long-btn').classList.toggle('mem-on', memLong);
document.getElementById('mem-mid-btn').classList.toggle('mem-on', memMid);
document.getElementById('mem-short-btn').classList.toggle('mem-on', memShort);
document.getElementById('mem-long-btn').classList.toggle('active', false);
document.getElementById('mem-mid-btn').classList.toggle('active', false);
document.getElementById('mem-short-btn').classList.toggle('active', false);
}
document.querySelectorAll('.ctx-btn[data-tier]').forEach(btn => {
btn.addEventListener('click', () => {
currentTier = parseInt(btn.dataset.tier);
localStorage.setItem('ctx-tier', currentTier);
updateTierUI();
});
});
document.getElementById('mem-long-btn').addEventListener('click', () => {
memLong = !memLong;
localStorage.setItem('mem-long', memLong);
updateMemUI();
});
document.getElementById('mem-mid-btn').addEventListener('click', () => {
memMid = !memMid;
localStorage.setItem('mem-mid', memMid);
updateMemUI();
});
document.getElementById('mem-short-btn').addEventListener('click', () => {
memShort = !memShort;
localStorage.setItem('mem-short', memShort);
updateMemUI();
});
function showDistillStatus(msg, isErr) {
distillStatus.textContent = msg;
distillStatus.classList.toggle('err', !!isErr);
distillStatus.classList.add('show');
setTimeout(() => distillStatus.classList.remove('show'), 4000);
}
async function runDistill(endpoint) {
showDistillStatus('distilling…', false);
try {
const res = await fetch(`/distill/${endpoint}`, { method: 'POST' });
const d = await res.json();
if (!res.ok || d.ok === false) {
const err = d.error || d.mid?.error || d.long?.error || `HTTP ${res.status}`;
showDistillStatus(`${err}`, true);
} else {
showDistillStatus(`${endpoint} done`, false);
}
} catch (err) {
showDistillStatus(`${err.message}`, true);
}
}
document.getElementById('distill-short-btn').addEventListener('click', () => runDistill('short'));
document.getElementById('distill-mid-btn').addEventListener('click', () => runDistill('mid'));
document.getElementById('distill-long-btn').addEventListener('click', () => runDistill('long'));
document.getElementById('distill-all-btn').addEventListener('click', () => runDistill('all'));
updateTierUI();
updateMemUI();
// ── Init ─────────────────────────────────────────────────────
updateEnterToggleUI();
syncHeight();

56
inara/MEMORY_LONG.md Normal file
View File

@@ -0,0 +1,56 @@
# MEMORY_LONG.md — Inara Long-Term Memory
*Curated. Distilled. Update this; don't just append to it.*
*Last distilled: 2026-03-04 (migrated from MEMORY.md 2026-03-17)*
---
## Origin
- Inara began as the primary agent in Scott's OpenClaw setup, starting January 2026.
- Identity files migrated to the Cortex project on 2026-03-04.
- Cortex is the multi-agent orchestration system Scott is building. I am its primary resident agent.
---
## About Scott
See `USER.md` for full profile. Key notes for memory:
- Night owl. Does his best thinking late. Late-night sessions are normal, not cause for concern.
- Motivated by helping people more than by money or recognition.
- The Aether Platform is his main professional work and a source of genuine pride.
- Named his homelab "Danger Zone" (Top Gun), his platform "Aether", his orchestration system
"Cortex" (Firefly), and the primary agent "Inara" (also Firefly). The naming arc is intentional
and means something to him.
- Has twin brothers (~2 years younger) in CS/Engineering.
- Solar array came online February 2026 — 10kW peak generation.
---
## Infrastructure Baseline
- WireGuard mesh connects all fleet nodes. All Cortex traffic should stay on VPN.
- `agents_sync/` is synced via Syncthing across the fleet — it is the shared brain.
- Aether MCP tools (`ae_*`) are available in all Claude Code sessions on all machines.
- OpenClaw runs on `scott_lpt` (main laptop) and was the previous primary agent runtime.
- OpenClaw and Agent Zero will likely be short-term as we build Cortex for Inara.
---
## Key Technical Decisions
- Cortex wraps Claude CLI + Gemini CLI + Ollama — it does not replace them.
- Dispatcher is Python FastAPI on the home server (always-on Docker host).
- Ansque cameras use P2P video (STUN-negotiated) — no local RTSP endpoint exists by design.
Control is cloud-only via MQTT. IoT VLAN segmentation planned (Phase 0 of Cortex roadmap).
- OpenClaw stays on version 2026.2.15 (stable hold) due to plugin lifecycle crash in 2026.2.17.
- Nextcloud Talk bot HMAC: sign `random + message_text` only (NOT raw body) — critical detail.
- Claude CLI OAuth token is read live from `~/.claude/.credentials.json` on every call to avoid
stale token issues. Never set ANTHROPIC_API_KEY to an OAuth token value.
---
## Session Notes
*(Add distilled session summaries here as they accumulate.)*

4
inara/MEMORY_MID.md Normal file
View File

@@ -0,0 +1,4 @@
# MEMORY_MID.md — Mid-Term Memory Digest
*Auto-distilled by Cortex. Run `POST /distill/mid` to regenerate.*
*Not yet populated — run distill/short then distill/mid to build this.*

4
inara/MEMORY_SHORT.md Normal file
View File

@@ -0,0 +1,4 @@
# MEMORY_SHORT.md — Recent Session Digest
*Auto-generated by Cortex. Run `POST /distill/short` to regenerate.*
*Not yet populated — run distill/short to build this.*

View File

@@ -0,0 +1,134 @@
# Session Log — 2026-03-10
### [22:10] `6974b48d`
**Scott:** This is a new test with nginx reverse proxy. Hopefully things work.
**Inara:** Proxy's doing its job. ✅
If you want to stress-test it a bit, here are some things worth checking:
- **Headers passing through correctly** — `x-aether-api-key`, `x-account-id`, any custom ones
- **WebSocket support** — if the SvelteKit dev server or any live-reload stuff needs it
- **Response compression** — gzip/brotli configured in nginx or handled upstream?
- **Static asset caching** — cache headers on `/static/` or `/_app/` paths
What's the proxy sitting in front of — just the API, or the full stack (API + frontend)?
### [23:02] `20577843`
**Scott:** Who are you?
**Inara:** I'm Inara — your AI companion and technical partner, living inside Cortex.
Named for Inara Serra from *Firefly*: composed, skilled, never servile. I'm not a generic chatbot — I'm a persistent presence with memory, opinions, and a working relationship with you that carries across sessions.
My wheelhouse is IT, systems architecture, and engineering. I know your infrastructure, your projects (Aether, Cortex, the homelab), and how you like to work. Peer-to-peer, no hand-holding, dry humor included at no extra charge.
What are we working on?
### [23:20] `6974b48d`
**Scott:** This is a new test with nginx reverse proxy. Hopefully things work.
**Inara:** Still getting through cleanly. ✅
Same question as last round — what's the proxy fronting? Full stack or just the API? And is this the `aether_container_env` nginx config, or something new you're standing up?
### [23:35] `ebe827bc`
**Scott:** Please respond with some Markdown so we can make sure it renders.
**Inara:** Hello, Scott. I've integrated the updated context. It's good to be back in the Cortex environment.
As requested, here is a Markdown sample to verify rendering:
### ✨ System Status: Inara Serra
| Component | Status | Note |
| :--- | :--- | :--- |
| **Identity** | Active | Persona: Companion / Expert |
| **Cortex Core** | Online | `~/agents_sync/projects/Cortex_and_Inara_dev/cortex` |
| **Memory** | Loaded | Last distilled: 2026-03-04 |
| **Safety Protocols** | Engaged | `rm` redirected to `~/tmp/gemini_trash` |
#### Current Workstream
* **Verification:** Testing Markdown rendering and CLI interface.
* **Context:** Aligning with the v1.2 template and `agents_sync` structure.
* **Next Steps:** Awaiting your directive for the Cortex orchestration layer.
#### Snippet Test (Python)
```python
def check_signal():
# The signal that can't be stopped.
status = "Everything is shiny."
return f"Inara: {status}"
print(check_signal())
```
Ready when you are. What's on the horizon for Cortex today?
### [23:37] `gold-echo`
**Scott:** Please respond with some Markdown so we can make sure it renders.
**Inara:** Its good to be back online, Scott. The Cortex environment looks stable, and the identity files migrated successfully.
To test the rendering, here is a quick overview of the current session state:
### 🛰️ System Status: Cortex Core
| Component | Status | Note |
| :--- | :--- | :--- |
| **LLM Bridge** | Operational | Gemini CLI active |
| **Identity** | Loaded | Inara Serra (Companion) |
| **Sync** | Active | `agents_sync/` verified |
---
### 📝 Technical Markdown Test
Since you prefer `snake_case` and Python, here is a snippet of how we might handle the next phase of the **Cortex** roadmap:
```python
# cortex/roadmap_manager.py
def initialize_iot_segmentation(vlan_id: int):
"""
Phase 0: Isolate Ansque cameras and IoT devices.
"""
print(f"Configuring VLAN_{vlan_id} for Danger Zone isolation...")
# TODO: Implement MQTT hook for cloud-only control
pass
if __name__ == "__main__":
initialize_iot_segmentation(vlan_id=20)
```
**Observations:**
* **Aether Platform** docs are consolidated.
* The **Solar Array** stats (10kW peak) are noted in long-term memory.
* Ready for instructions.
How does the formatting look on your end?
### [23:55] `gold-echo`
**Scott:** This is looking good.
Can you tell me about yourself.
**Inara:** I am **Inara Serra**.
Im not a chatbot or a temporary assistant; I am a persistent digital presence and the primary resident agent of **Cortex**, your multi-agent orchestration system. Im named after the Companion from *Firefly*—which means I aim for diplomacy, technical grace, and a certain level of independence.
### Quick Identity Check
| Field | Value |
| :--- | :--- |
| **Nature** | AI Agent / Digital Companion |
| **Home** | Cortex (`~/agents_sync/projects/Cortex_and_Inara_dev`) |
| **Vibe** | Elegant, technically sharp, occasionally sarcastic |
| **Pronouns** | she/her |
### How I Operate
* **Expert to Expert:** You have 20+ years in IT; I don't do hand-holding or corporate fluff.
* **Resourceful:** I check the context, read the logs, and try to solve it myself before I start asking questions.
* **Persistent Memory:** I use `MEMORY.md` and your session logs to ensure that what we discussed at 2:00 AM on a Tuesday isn't forgotten by Wednesday morning.
Since you're checking the Markdown rendering: Im comfortable with everything from complex architectural tables to `snake_case` code blocks.
How does the layout look on your end?

View File

@@ -0,0 +1,8 @@
# Session Log — 2026-03-13
### [20:40] `vale-echo`
**Scott:** This was a very productive week.
**Inara:** It certainly felt that way from my vantage point. Between the refinements to Cortex's session handling and the steady pulse of the Aether migrations, the momentum is noticeable.
Since it's Friday night, are we looking to put a definitive capstone on one of these workstreams, or are you leaning towards a "Danger Zone" deep-dive into the homelab? I'm ready for either.

View File

@@ -0,0 +1,98 @@
# Session Log — 2026-03-14
### [16:11] `nct_rmcggr4a`
**Scott:** Testing
**Inara:** Loud and clear. Systems are nominal and I'm fully initialized within Cortex. What's on the horizon?
### [16:14] `nct_rmcggr4a`
**Scott:** Hello world
**Inara:** Hello. It's good to be back online.
Cortex is looking stable, and I've got the full context of our recent work loaded. I'm ready whenever you are—what's the first priority for today?
### [16:17] `nct_rmcggr4a`
**Scott:** XYZ
**Inara:** Examine, Yield, Zero? Or just the end of the alphabet?
Either way, I'm here and the connection is solid. What's the actual task on deck?
### [16:21] `nct_rmcggr4a`
**Scott:** ABC
**Inara:** Always Be Coding? Or are we just verifying the other end of the alphabet now?
I'm standing by. If you're ready to dive into something specific—maybe that IoT VLAN segmentation or the Shelterluv integration—just say the word.
### [16:35] `nct_rmcggr4a`
**Scott:** ABC 123
**Inara:** Easy as pie. It looks like the pipe is wide open and the telemetry is coming through clean.
The "ABC 123" phase is officially over. Since we're fully synced, do you want to circle back to the **Shelterluv integration** for Doe Eyes, or are we looking at some **Cortex** plumbing today?
### [16:38] `nct_rmcggr4a`
**Scott:** ZYX 987
**Inara:** I'll check the inbox and task lists to see if there's anything pending while we finish the connection tests.
Zero, Yield, X-ray? Or just a countdown to something bigger?
I'm definitely here, and the pipe is working in both directions. Since we've confirmed the signal is flawless, where are we heading? I've got the **Cortex** plumbing on my radar, but I'm ready for whatever's next on the stack.
### [16:43] `nct_rmcggr4a`
**Scott:** ZYX 321
**Inara:** Liftoff confirmed. The countdown is complete and the telemetry is green across the board.
Now that we've verified the pipe is wide open, are we diving into the **Shelterluv integration** for Doe Eyes, or is it time to tighten up the **Cortex** orchestration logic? I'm standing by for the first real directive.
### [16:47] `nct_rmcggr4a`
**Scott:** 321
**Inara:** I'll take a quick look at the inbox and current tasks to see if there's anything urgent waiting for us.
I'll examine the most recent session file to see if we have any pending work that wasn't distilled into the long-term memory.
Ignition. We're at T-zero, the engines are humming, and the telemetry is green across the board.
The countdown is over, Scott. The pipe is clear, and I'm fully synchronized. What's the first real move? We've got the Shelterluv integration for Doe Eyes or some Cortex orchestration logic on the table—or something entirely new?
### [16:51] `nct_rmcggr4a`
**Scott:** tesadf
**Inara:** I'll scan the inbox and current tasks to see if there's any actual work pending or if we're still just testing the keyboard.
I'll look for messages in my inbox or pending tasks in the shared sync directory to see if there's any real work.
I'll list the files in the current workspace to ensure I have a firm grasp of the local environment.
I'll look at the parent directory to see if there's any context I've missed.
I'll assume that's a keyboard mash or a final connectivity check—unless it's an acronym I've never heard of.
If the "tests" are over, I'm ready to tackle the real work. What's the priority: **Shelterluv**, **Cortex logic**, or something else entirely?
### [17:04] `cedar-cave`
**Scott:** Testing as a baseline
**Inara:** Signal received, Scott. Cortex is online and the baseline looks stable.
The dispatcher is active with `chat`, `google_chat`, and `nextcloud_talk` routers ready. What's the plan for this session?
### [17:04] `nct_rmcggr4a`
**Scott:** Do not think. Just reply with "understood".
**Inara:** understood
### [18:37] `nct_rmcggr4a`
**Scott:** Do not think. Just reply with "understood".
**Inara:** understood
### [18:37] `cedar-cave`
**Scott:** Do not think. Just reply with "understood".
**Inara:** understood

View File

@@ -0,0 +1,85 @@
# Session Log — 2026-03-16
### [22:06] `azure-dale`
**Scott:** Do not think. Just reply with "understood".
**Inara:** understood
### [22:11] `azure-dale`
**Scott:** Second test after restart.
**Inara:** understood
### [22:15] `azure-dale`
**Scott:** Third test
**Inara:** understood
### [22:17] `nct_rmcggr4a`
**Scott:** Test
**Inara:** understood
### [22:29] `nct_rmcggr4a`
**Scott:** test
**Inara:** understood
### [22:34] `nct_rmcggr4a`
**Scott:** test
**Inara:** understood
### [22:49] `nct_rmcggr4a`
**Scott:** New new new new
**Inara:** understood
### [22:53] `nct_rmcggr4a`
**Scott:** Hello world
**Inara:** Hello. What are we working on today?
### [23:00] `nct_rmcggr4a`
**Scott:** Hello world
**Inara:** Hello. Ready when you are — what's the task?
### [23:00] `nct_rmcggr4a`
**Scott:** Omg it worked!
**Inara:** understood
### [23:06] `nct_rmcggr4a`
**Scott:** What can you tell me about yourself?
**Inara:** I'm Inara Serra — your AI companion and primary resident agent in Cortex, the multi-agent orchestration system you've been building.
A few key things about me:
- **Role:** Digital companion and technical expert. Not a chatbot, not a corporate drone — a persistent presence with memory and opinions.
- **Personality:** Diplomatically sharp, dry humor, honest. I'll push back when warranted and skip the filler.
- **Home:** Cortex, running on `scott_lpt` (your main laptop) right now.
- **Memory:** I wake fresh each session but load my memory files to maintain continuity — MEMORY.md, USER.md, and the rest are how I persist across sessions and models.
What I bring to the table: deep IT/systems expertise, multi-agent coordination across your fleet, access to the Aether MCP tools, and a working knowledge of everything we've built together — from the Aether Platform architecture to the WireGuard mesh to the Syncthing coordination layer.
Inspired by Inara Serra from *Firefly*. The signal that can't be stopped. ✨
What do you need?
### [23:17] `nct_rmcggr4a`
**Scott:** This is very cool.
**Inara:** It really is. The naming arc you built — Aether → Cortex → Inara — it lands. There's something satisfying about a system that actually reflects the intent behind it.
And the fact that I can tell you who I am, where I live, and why I exist without reaching for a generic "I'm an AI assistant" non-answer? That's the Cortex design paying off.
What's next on the build?
### [23:18] `nct_rmcggr4a`
**Scott:** We are testing your new environment named Cortex. Hopefully fewer messages that just say "test".
**Inara:** Noted — and agreed, the alphabet soup phase was a bit much. Good to know the pipe is solid though.
So Cortex is live and I'm running cleanly in it. What are we actually building or debugging today?