Initial commit — Cortex API + Inara identity
Cortex: FastAPI backend serving Inara via Claude/Gemini CLI backends. Includes SSE streaming chat, session persistence, Google Chat webhook handler, and Docker support. Inara: Identity files (persona, soul, protocols, memory, context tiers) mounted read-only into the container at runtime. Features in initial cut: - /chat endpoint with SSE keepalive + LLM fallback - Session store with rolling history window - Markdown rendering, copy-to-clipboard, links open in new tab - Stacked right-column input controls (height selector, enter toggle, note mode with public/private) — semi-hidden until textarea grows - /note endpoint for injecting public context into session history - Docker Compose config (local dev runs natively; Docker for server) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
0
cortex/routers/__init__.py
Normal file
0
cortex/routers/__init__.py
Normal file
140
cortex/routers/chat.py
Normal file
140
cortex/routers/chat.py
Normal file
@@ -0,0 +1,140 @@
|
||||
import asyncio
|
||||
import json
|
||||
import uuid
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from fastapi.responses import StreamingResponse
|
||||
from pydantic import BaseModel
|
||||
from context_loader import load_context
|
||||
from llm_client import complete
|
||||
from session_logger import log_turn
|
||||
from session_store import load as load_session, save as save_session, list_all
|
||||
from config import settings
|
||||
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class ChatRequest(BaseModel):
|
||||
message: str
|
||||
session_id: str | None = None
|
||||
tier: int | None = None
|
||||
model: str | None = None # "claude" or "gemini" to override; None = use primary_backend
|
||||
|
||||
|
||||
class BackendRequest(BaseModel):
|
||||
primary: str # "claude" or "gemini"
|
||||
|
||||
|
||||
class NoteRequest(BaseModel):
|
||||
session_id: str
|
||||
note: str
|
||||
|
||||
|
||||
async def _stream_chat(req: ChatRequest):
|
||||
"""
|
||||
SSE generator: sends keepalive events every 3s while the LLM works,
|
||||
then sends the final response. Keeps the browser connection alive
|
||||
regardless of how long the backend takes.
|
||||
|
||||
Event types:
|
||||
data: {"type": "keepalive"}
|
||||
data: {"type": "response", "response": "...", "session_id": "...",
|
||||
"backend": "...", "fallback_used": bool}
|
||||
data: {"type": "error", "message": "..."}
|
||||
"""
|
||||
session_id = req.session_id or str(uuid.uuid4())[:8]
|
||||
tier = req.tier or settings.default_tier
|
||||
|
||||
system_prompt = load_context(tier)
|
||||
history = load_session(session_id)
|
||||
history.append({"role": "user", "content": req.message})
|
||||
|
||||
task = asyncio.create_task(complete(
|
||||
system_prompt=system_prompt,
|
||||
messages=history,
|
||||
model=req.model,
|
||||
))
|
||||
|
||||
try:
|
||||
# Ping the browser every 3s so it doesn't drop the connection
|
||||
while not task.done():
|
||||
yield 'data: {"type":"keepalive"}\n\n'
|
||||
try:
|
||||
await asyncio.wait_for(asyncio.shield(task), timeout=3)
|
||||
except asyncio.TimeoutError:
|
||||
pass
|
||||
except Exception:
|
||||
break
|
||||
|
||||
try:
|
||||
response_text, actual_backend = task.result()
|
||||
history.append({"role": "assistant", "content": response_text})
|
||||
save_session(session_id, history)
|
||||
log_turn(session_id, req.message, response_text)
|
||||
|
||||
requested = req.model or settings.primary_backend
|
||||
payload = {
|
||||
"type": "response",
|
||||
"response": response_text,
|
||||
"session_id": session_id,
|
||||
"backend": actual_backend,
|
||||
"fallback_used": actual_backend != requested,
|
||||
}
|
||||
yield f"data: {json.dumps(payload)}\n\n"
|
||||
|
||||
except Exception as e:
|
||||
yield f"data: {json.dumps({'type': 'error', 'message': str(e)})}\n\n"
|
||||
|
||||
finally:
|
||||
# Ensure the LLM task is cancelled if the generator is torn down
|
||||
# (e.g. client disconnect or server shutdown). This propagates
|
||||
# CancelledError into _gemini() which kills the process group.
|
||||
if not task.done():
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
|
||||
|
||||
@router.post("/chat")
|
||||
async def chat(req: ChatRequest) -> StreamingResponse:
|
||||
return StreamingResponse(
|
||||
_stream_chat(req),
|
||||
media_type="text/event-stream",
|
||||
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
|
||||
)
|
||||
|
||||
|
||||
@router.get("/backend")
|
||||
async def get_backend() -> dict:
|
||||
other = "gemini" if settings.primary_backend == "claude" else "claude"
|
||||
return {"primary": settings.primary_backend, "fallback": other}
|
||||
|
||||
|
||||
@router.post("/backend")
|
||||
async def set_backend(req: BackendRequest) -> dict:
|
||||
if req.primary not in ("claude", "gemini"):
|
||||
raise HTTPException(status_code=400, detail="primary must be 'claude' or 'gemini'")
|
||||
settings.primary_backend = req.primary
|
||||
other = "gemini" if req.primary == "claude" else "claude"
|
||||
return {"primary": settings.primary_backend, "fallback": other}
|
||||
|
||||
|
||||
@router.get("/history/{session_id}")
|
||||
async def get_history(session_id: str) -> dict:
|
||||
return {"session_id": session_id, "messages": load_session(session_id)}
|
||||
|
||||
|
||||
@router.get("/sessions")
|
||||
async def list_sessions() -> dict:
|
||||
return {"sessions": list_all()}
|
||||
|
||||
|
||||
@router.post("/note")
|
||||
async def add_note(req: NoteRequest) -> dict:
|
||||
"""Inject a public note into session history so the LLM sees it next turn."""
|
||||
history = load_session(req.session_id)
|
||||
history.append({"role": "user", "content": f"[NOTE] {req.note}"})
|
||||
save_session(req.session_id, history)
|
||||
return {"ok": True, "session_id": req.session_id}
|
||||
74
cortex/routers/google_chat.py
Normal file
74
cortex/routers/google_chat.py
Normal file
@@ -0,0 +1,74 @@
|
||||
import asyncio
|
||||
import logging
|
||||
from fastapi import APIRouter, Request, Response
|
||||
from context_loader import load_context
|
||||
from llm_client import complete
|
||||
from session_logger import log_turn
|
||||
from session_store import load as load_session, save as save_session
|
||||
from config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/channels/google-chat")
|
||||
|
||||
|
||||
@router.post("")
|
||||
async def receive(request: Request):
|
||||
body = await request.json()
|
||||
event_type = body.get("type")
|
||||
|
||||
if event_type == "ADDED_TO_SPACE":
|
||||
space_type = body.get("space", {}).get("type", "")
|
||||
greeting = "✨ Hello! I'm Inara. Send me a message and I'll do my best to help."
|
||||
if space_type == "DM":
|
||||
greeting = "✨ Hello! I'm Inara. What can I help you with?"
|
||||
return {"text": greeting}
|
||||
|
||||
if event_type == "REMOVED_FROM_SPACE":
|
||||
return Response(status_code=200)
|
||||
|
||||
if event_type != "MESSAGE":
|
||||
return Response(status_code=200)
|
||||
|
||||
message = body.get("message", {})
|
||||
sender = message.get("sender", {})
|
||||
space = body.get("space", {})
|
||||
|
||||
# argumentText strips the @BotName mention in Spaces; fall back to full text in DMs
|
||||
user_text = (message.get("argumentText") or message.get("text", "")).strip()
|
||||
if not user_text:
|
||||
return Response(status_code=200)
|
||||
|
||||
sender_display = sender.get("displayName", "User")
|
||||
space_name = space.get("name", "unknown")
|
||||
space_type = space.get("type", "")
|
||||
|
||||
# Session keyed per space — one conversation per DM or Space
|
||||
session_id = "gc_" + space_name.replace("/", "_")
|
||||
|
||||
logger.info("Google Chat message from %s in %s (%s)", sender_display, space_name, space_type)
|
||||
|
||||
system_prompt = load_context(settings.default_tier)
|
||||
history = load_session(session_id)
|
||||
history.append({"role": "user", "content": user_text})
|
||||
|
||||
try:
|
||||
response_text, actual_backend = await asyncio.wait_for(
|
||||
complete(
|
||||
system_prompt=system_prompt,
|
||||
messages=history,
|
||||
model=settings.google_chat_backend,
|
||||
),
|
||||
timeout=settings.google_chat_timeout,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("Google Chat request timed out for session %s", session_id)
|
||||
return {"text": "⏳ Still thinking — this is taking a bit longer than usual. Try again in a moment."}
|
||||
except Exception as e:
|
||||
logger.error("Google Chat error for session %s: %s", session_id, e)
|
||||
return {"text": f"⚠️ Something went wrong on my end. Try again shortly."}
|
||||
|
||||
history.append({"role": "assistant", "content": response_text})
|
||||
save_session(session_id, history)
|
||||
log_turn(session_id, user_text, response_text)
|
||||
|
||||
return {"text": response_text}
|
||||
Reference in New Issue
Block a user