Initial commit — Cortex API + Inara identity

Cortex: FastAPI backend serving Inara via Claude/Gemini CLI backends. Includes SSE streaming chat, session persistence, Google Chat webhook handler, and Docker support. Inara: Identity files (persona, soul, protocols, memory, context tiers) mounted read-only into the container at runtime. Features in initial cut: - /chat endpoint with SSE keepalive + LLM fallback - Session store with rolling history window - Markdown rendering, copy-to-clipboard, links open in new tab - Stacked right-column input controls (height selector, enter toggle, note mode with public/private) — semi-hidden until textarea grows - /note endpoint for injecting public context into session history - Docker Compose config (local dev runs natively; Docker for server) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-06 03:41:00 -05:00
commit 2f675ee4bf
27 changed files with 2282 additions and 0 deletions
--- a/cortex/routers/init.py
+++ b/cortex/routers/init.py
--- a/cortex/routers/chat.py
+++ b/cortex/routers/chat.py
@@ -0,0 +1,140 @@
+import asyncio
+import json
+import uuid
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import StreamingResponse
+from pydantic import BaseModel
+from context_loader import load_context
+from llm_client import complete
+from session_logger import log_turn
+from session_store import load as load_session, save as save_session, list_all
+from config import settings
+
+
+router = APIRouter()
+
+
+class ChatRequest(BaseModel):
+    message: str
+    session_id: str | None = None
+    tier: int | None = None
+    model: str | None = None  # "claude" or "gemini" to override; None = use primary_backend
+
+
+class BackendRequest(BaseModel):
+    primary: str  # "claude" or "gemini"
+
+
+class NoteRequest(BaseModel):
+    session_id: str
+    note: str
+
+
+async def _stream_chat(req: ChatRequest):
+    """
+    SSE generator: sends keepalive events every 3s while the LLM works,
+    then sends the final response. Keeps the browser connection alive
+    regardless of how long the backend takes.
+
+    Event types:
+      data: {"type": "keepalive"}
+      data: {"type": "response", "response": "...", "session_id": "...",
+              "backend": "...", "fallback_used": bool}
+      data: {"type": "error", "message": "..."}
+    """
+    session_id = req.session_id or str(uuid.uuid4())[:8]
+    tier = req.tier or settings.default_tier
+
+    system_prompt = load_context(tier)
+    history = load_session(session_id)
+    history.append({"role": "user", "content": req.message})
+
+    task = asyncio.create_task(complete(
+        system_prompt=system_prompt,
+        messages=history,
+        model=req.model,
+    ))
+
+    try:
+        # Ping the browser every 3s so it doesn't drop the connection
+        while not task.done():
+            yield 'data: {"type":"keepalive"}\n\n'
+            try:
+                await asyncio.wait_for(asyncio.shield(task), timeout=3)
+            except asyncio.TimeoutError:
+                pass
+            except Exception:
+                break
+
+        try:
+            response_text, actual_backend = task.result()
+            history.append({"role": "assistant", "content": response_text})
+            save_session(session_id, history)
+            log_turn(session_id, req.message, response_text)
+
+            requested = req.model or settings.primary_backend
+            payload = {
+                "type": "response",
+                "response": response_text,
+                "session_id": session_id,
+                "backend": actual_backend,
+                "fallback_used": actual_backend != requested,
+            }
+            yield f"data: {json.dumps(payload)}\n\n"
+
+        except Exception as e:
+            yield f"data: {json.dumps({'type': 'error', 'message': str(e)})}\n\n"
+
+    finally:
+        # Ensure the LLM task is cancelled if the generator is torn down
+        # (e.g. client disconnect or server shutdown). This propagates
+        # CancelledError into _gemini() which kills the process group.
+        if not task.done():
+            task.cancel()
+            try:
+                await task
+            except (asyncio.CancelledError, Exception):
+                pass
+
+
+@router.post("/chat")
+async def chat(req: ChatRequest) -> StreamingResponse:
+    return StreamingResponse(
+        _stream_chat(req),
+        media_type="text/event-stream",
+        headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
+    )
+
+
+@router.get("/backend")
+async def get_backend() -> dict:
+    other = "gemini" if settings.primary_backend == "claude" else "claude"
+    return {"primary": settings.primary_backend, "fallback": other}
+
+
+@router.post("/backend")
+async def set_backend(req: BackendRequest) -> dict:
+    if req.primary not in ("claude", "gemini"):
+        raise HTTPException(status_code=400, detail="primary must be 'claude' or 'gemini'")
+    settings.primary_backend = req.primary
+    other = "gemini" if req.primary == "claude" else "claude"
+    return {"primary": settings.primary_backend, "fallback": other}
+
+
+@router.get("/history/{session_id}")
+async def get_history(session_id: str) -> dict:
+    return {"session_id": session_id, "messages": load_session(session_id)}
+
+
+@router.get("/sessions")
+async def list_sessions() -> dict:
+    return {"sessions": list_all()}
+
+
+@router.post("/note")
+async def add_note(req: NoteRequest) -> dict:
+    """Inject a public note into session history so the LLM sees it next turn."""
+    history = load_session(req.session_id)
+    history.append({"role": "user", "content": f"[NOTE] {req.note}"})
+    save_session(req.session_id, history)
+    return {"ok": True, "session_id": req.session_id}
--- a/cortex/routers/google_chat.py
+++ b/cortex/routers/google_chat.py
@@ -0,0 +1,74 @@
+import asyncio
+import logging
+from fastapi import APIRouter, Request, Response
+from context_loader import load_context
+from llm_client import complete
+from session_logger import log_turn
+from session_store import load as load_session, save as save_session
+from config import settings
+
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/channels/google-chat")
+
+
+@router.post("")
+async def receive(request: Request):
+    body = await request.json()
+    event_type = body.get("type")
+
+    if event_type == "ADDED_TO_SPACE":
+        space_type = body.get("space", {}).get("type", "")
+        greeting = "✨ Hello! I'm Inara. Send me a message and I'll do my best to help."
+        if space_type == "DM":
+            greeting = "✨ Hello! I'm Inara. What can I help you with?"
+        return {"text": greeting}
+
+    if event_type == "REMOVED_FROM_SPACE":
+        return Response(status_code=200)
+
+    if event_type != "MESSAGE":
+        return Response(status_code=200)
+
+    message = body.get("message", {})
+    sender = message.get("sender", {})
+    space  = body.get("space", {})
+
+    # argumentText strips the @BotName mention in Spaces; fall back to full text in DMs
+    user_text = (message.get("argumentText") or message.get("text", "")).strip()
+    if not user_text:
+        return Response(status_code=200)
+
+    sender_display = sender.get("displayName", "User")
+    space_name     = space.get("name", "unknown")
+    space_type     = space.get("type", "")
+
+    # Session keyed per space — one conversation per DM or Space
+    session_id = "gc_" + space_name.replace("/", "_")
+
+    logger.info("Google Chat message from %s in %s (%s)", sender_display, space_name, space_type)
+
+    system_prompt = load_context(settings.default_tier)
+    history = load_session(session_id)
+    history.append({"role": "user", "content": user_text})
+
+    try:
+        response_text, actual_backend = await asyncio.wait_for(
+            complete(
+                system_prompt=system_prompt,
+                messages=history,
+                model=settings.google_chat_backend,
+            ),
+            timeout=settings.google_chat_timeout,
+        )
+    except asyncio.TimeoutError:
+        logger.warning("Google Chat request timed out for session %s", session_id)
+        return {"text": "⏳ Still thinking — this is taking a bit longer than usual. Try again in a moment."}
+    except Exception as e:
+        logger.error("Google Chat error for session %s: %s", session_id, e)
+        return {"text": f"⚠️ Something went wrong on my end. Try again shortly."}
+
+    history.append({"role": "assistant", "content": response_text})
+    save_session(session_id, history)
+    log_turn(session_id, user_text, response_text)
+
+    return {"text": response_text}