feat: SSH dev routing, model registry UX, chat input toolbar, doc sync
Backend / infrastructure:
- cortex/tools/_projects.py (new): shared project alias registry with ssh_host
for workstation projects (aether_api, aether_frontend, aether_container)
- cortex/tools/git.py: all git tools route to workstation via SSH when ssh_host set
- cortex/tools/aider.py: aider_run SSH-routes to workstation using bash -l -c
- cortex/routers/local_llm.py: POST /api/models/{id}/edit AJAX endpoint — save
model edits without page reload or tab reset; returns JSON {ok, label, model_name}
- cortex/llm_client.py: remove Gemini CLI and Claude CLI backends; clean up
fallback chain and process group tracking (continuation of Gemini CLI removal)
- cortex/routers/auth.py: strip Claude/Gemini CLI auth status checks (CLI removed)
- cortex/routers/chat.py: remove legacy claude/gemini backend fields
- cortex/config.py: clean up CLI-related settings
- cortex/main.py: remove CLI lifecycle hooks
UI:
- cortex/static/local_llm.html: model edit forms now save via fetch() + toast;
stay on Models tab; update row header label in place on success
- cortex/static/index.html: restructure input area to column layout — textarea
above, compact toolbar below (Chat/Tools/Attach + Send); fixes dead space at
M/L/XL sizes; context panel "Role" → "Model" section label
- cortex/static/style.css: column input-area layout; #input-toolbar; flex:1 →
width:100% on textarea (fixes scrollHeight in column flex context); compact
send/stop button padding
- cortex/static/app.js: add XL (720px) to height cycle; default M (240px)
Docs:
- cortex/static/HELP.md: S/M/L → S/M/L/XL; add Rebuild to distill table; fix
"Role selector" references (no such UI); fix "your active role" → Chat role;
fix ⚡ toggle description; Model Registry section cleanup
- documentation/ARCH__BACKENDS.md: reflect CLI removal, current backend state
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -3,7 +3,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
anthropic_api_key: str | None = None # not used — claude CLI handles auth
|
||||
anthropic_api_key: str | None = None # not used — configure via model registry
|
||||
|
||||
# Google OAuth — "Sign in with Google" for all users
|
||||
# Create credentials at console.cloud.google.com → APIs & Services → Credentials
|
||||
@@ -38,7 +38,6 @@ class Settings(BaseSettings):
|
||||
default_model: str = "claude-sonnet-4-6"
|
||||
default_tier: int = 2
|
||||
max_history_messages: int = 40 # rolling window — 20 turns (user + assistant)
|
||||
primary_backend: str = "claude" # "claude" | "local" — gemini CLI removed June 2026
|
||||
|
||||
# Local model backend — OpenAI-compatible API (Open WebUI / Ollama)
|
||||
# Set LOCAL_API_URL in .env to enable; leave blank to disable
|
||||
@@ -46,9 +45,6 @@ class Settings(BaseSettings):
|
||||
local_api_key: str = "" # sk-... from Open WebUI → Settings → Account → API Keys
|
||||
local_model: str = "" # workspace or model name, e.g. test-agent-simple
|
||||
|
||||
# Per-backend timeouts in seconds
|
||||
timeout_claude: int = 60
|
||||
timeout_gemini: int = 120 # frequently slow under load
|
||||
timeout_local: int = 300 # local models may need to load first
|
||||
|
||||
# Auto-distillation schedule — override in .env
|
||||
@@ -66,14 +62,13 @@ class Settings(BaseSettings):
|
||||
distill_backend_long: str = ""
|
||||
|
||||
# Model registry: default backend type per role when user registry has no entry.
|
||||
# Values: "claude_cli" | "gemini_cli" | "gemini_api" (builtin IDs)
|
||||
# Override in .env: ROLE_CHAT=claude_cli ROLE_DISTILL=gemini_api etc.
|
||||
role_chat: str = "claude_cli"
|
||||
role_orchestrator: str = "gemini_api"
|
||||
role_distill: str = "claude_cli"
|
||||
role_janitor: str = "claude_cli" # assign a cheap/fast model: Haiku 4.5, local Gemma E4B
|
||||
role_coder: str = "claude_cli"
|
||||
role_research: str = "gemini_api"
|
||||
# All roles must be configured via /settings/models — no built-in fallback.
|
||||
role_chat: str = ""
|
||||
role_orchestrator: str = ""
|
||||
role_distill: str = ""
|
||||
role_janitor: str = ""
|
||||
role_coder: str = ""
|
||||
role_research: str = ""
|
||||
|
||||
# Comma-separated list of standard roles shown in the model settings UI.
|
||||
# Add custom roles here to extend the UI without code changes.
|
||||
@@ -122,8 +117,8 @@ class Settings(BaseSettings):
|
||||
return [r.strip() for r in self.defined_roles.split(",") if r.strip()]
|
||||
|
||||
def get_role_default(self, role: str) -> str:
|
||||
"""Return the .env default backend type for a role (e.g. 'claude_cli')."""
|
||||
return getattr(self, f"role_{role.replace('-', '_')}", "claude_cli")
|
||||
"""Return the .env default backend type for a role, or '' if unconfigured."""
|
||||
return getattr(self, f"role_{role.replace('-', '_')}", "")
|
||||
|
||||
def home_root(self) -> Path:
|
||||
"""Resolve home_dir relative to this file's location if not absolute."""
|
||||
|
||||
@@ -1,50 +1,18 @@
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import signal
|
||||
import subprocess
|
||||
from config import settings
|
||||
import event_bus
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Track active Gemini process group IDs so we can kill them on shutdown
|
||||
_active_pgroups: set[int] = set()
|
||||
|
||||
|
||||
def _register_pgroup(pid: int) -> None:
|
||||
_active_pgroups.add(pid)
|
||||
|
||||
|
||||
def _unregister_pgroup(pid: int) -> None:
|
||||
_active_pgroups.discard(pid)
|
||||
|
||||
|
||||
async def cleanup() -> None:
|
||||
"""Kill any lingering Gemini process groups. Call from lifespan shutdown."""
|
||||
for pid in list(_active_pgroups):
|
||||
try:
|
||||
os.killpg(pid, signal.SIGKILL)
|
||||
logger.info("Shutdown: killed Gemini process group %d", pid)
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
_active_pgroups.clear()
|
||||
|
||||
|
||||
# Map from registry model type → dispatch function key
|
||||
_TYPE_TO_BACKEND = {
|
||||
"claude_cli": "claude",
|
||||
"gemini_cli": "gemini", # Gemini CLI is being replaced by Antigravity CLI (June 2026)
|
||||
"gemini_api": "gemini", # routes to CLI subprocess — no users configured; kept for compat
|
||||
"local_openai": "local",
|
||||
"anthropic_api": "anthropic_api",
|
||||
}
|
||||
|
||||
# Explicit UI toggle values (kept for backward compat)
|
||||
_EXPLICIT_BACKENDS = ("claude", "gemini", "local")
|
||||
# Gemini CLI removed from the claude fallback — it's shutting down June 18 2026.
|
||||
# claude failures now surface directly; gemini backend still falls back to claude.
|
||||
_FALLBACK: dict[str, str | None] = {"claude": None, "gemini": "claude", "local": "claude", "anthropic_api": "claude"}
|
||||
_FALLBACK: dict[str, str | None] = {
|
||||
"local": None,
|
||||
"anthropic_api": None,
|
||||
}
|
||||
|
||||
|
||||
async def complete(
|
||||
@@ -55,16 +23,15 @@ async def complete(
|
||||
slot: str | None = None,
|
||||
max_tokens: int = 2048,
|
||||
attachment: dict | None = None,
|
||||
token_sink=None, # async (str) -> None; if set, stream tokens as they arrive
|
||||
token_sink=None,
|
||||
) -> tuple[str, str]:
|
||||
"""
|
||||
Returns (response_text, actual_backend_used).
|
||||
|
||||
slot: Phase 3 — specific role slot ("primary" | "backup_1" | "backup_2").
|
||||
Resolves that exact slot, no fallback chain. Takes priority over model.
|
||||
model: legacy backend override ("claude" | "gemini" | "local") from old toggle.
|
||||
None = resolve via model registry for the given role.
|
||||
role: registry role used for slot/auto routing (default: "chat").
|
||||
slot: explicit role slot ("primary" | "backup_1" | "backup_2").
|
||||
Resolves that exact slot, no fallback chain. Takes priority over role.
|
||||
role: registry role used for auto routing (default: "chat").
|
||||
model: ignored — kept for API compatibility; routing is via slot/role only.
|
||||
"""
|
||||
import model_registry as _reg
|
||||
from persona import _user
|
||||
@@ -73,46 +40,33 @@ async def complete(
|
||||
resolved_cfg: dict | None = None
|
||||
|
||||
if slot is not None:
|
||||
# Phase 3: explicit slot selection — no fallback within the role
|
||||
resolved_cfg = _reg.get_model_for_slot(username, role, slot)
|
||||
if resolved_cfg:
|
||||
primary = _TYPE_TO_BACKEND.get(resolved_cfg["type"], "claude")
|
||||
primary = _TYPE_TO_BACKEND.get(resolved_cfg["type"], "local")
|
||||
else:
|
||||
# Slot not configured — fall through to auto routing
|
||||
slot = None
|
||||
|
||||
if slot is None:
|
||||
if model in _EXPLICIT_BACKENDS:
|
||||
# Legacy: explicit backend override from old UI toggle
|
||||
if model == "local":
|
||||
resolved_cfg = _reg.get_best_local_model(username, role)
|
||||
if not resolved_cfg:
|
||||
raise RuntimeError("No local model configured — add one at /settings/models")
|
||||
primary = model
|
||||
else:
|
||||
# Auto: role-based routing via model registry
|
||||
resolved = _reg.get_model_for_role(username, role)
|
||||
if resolved:
|
||||
resolved_cfg = resolved
|
||||
primary = _TYPE_TO_BACKEND.get(resolved["type"], "claude")
|
||||
primary = _TYPE_TO_BACKEND.get(resolved["type"], "local")
|
||||
else:
|
||||
primary = settings.primary_backend
|
||||
raise RuntimeError(
|
||||
f"No model configured for role '{role}'. "
|
||||
"Add one at /settings/models."
|
||||
)
|
||||
|
||||
fallback = _FALLBACK.get(primary, "claude")
|
||||
fallback = _FALLBACK.get(primary)
|
||||
|
||||
try:
|
||||
response = await _dispatch(primary, system_prompt, messages, resolved_cfg,
|
||||
attachment=attachment, token_sink=token_sink)
|
||||
return response, primary
|
||||
except Exception as e:
|
||||
err_str = str(e)
|
||||
if primary == "claude" and any(k in err_str for k in ("401", "authenticate", "expired", "OAuth")):
|
||||
await event_bus.publish({"type": "claude_auth_expired"})
|
||||
# Surface errors when a model is explicitly configured or a specific slot was pinned.
|
||||
if resolved_cfg is not None:
|
||||
logger.error("%s failed (no fallback — model explicitly configured): %s", primary, e)
|
||||
raise
|
||||
# No fallback defined for this backend — surface the error directly.
|
||||
if not fallback:
|
||||
logger.error("%s failed (no fallback configured): %s", primary, e)
|
||||
raise
|
||||
@@ -129,9 +83,7 @@ async def _dispatch(
|
||||
attachment: dict | None = None,
|
||||
token_sink=None,
|
||||
) -> str:
|
||||
if backend == "gemini":
|
||||
text = await _gemini(system_prompt, messages)
|
||||
elif backend == "local":
|
||||
if backend == "local":
|
||||
if token_sink:
|
||||
return await _local_streaming(token_sink, system_prompt, messages, model_cfg)
|
||||
text = await _local(system_prompt, messages, model_cfg, attachment=attachment)
|
||||
@@ -140,55 +92,12 @@ async def _dispatch(
|
||||
return await _anthropic_api_streaming(token_sink, system_prompt, messages, model_cfg)
|
||||
text = await _anthropic_api(system_prompt, messages, model_cfg)
|
||||
else:
|
||||
text = await _claude(system_prompt, messages, model_cfg)
|
||||
# For non-streaming backends when token_sink is provided, emit the full text as one chunk.
|
||||
raise RuntimeError(f"Unknown backend '{backend}' — check model type in registry")
|
||||
if token_sink and text:
|
||||
await token_sink(text)
|
||||
return text
|
||||
|
||||
|
||||
def _fresh_claude_token() -> str | None:
|
||||
"""Read the current OAuth access token from the Claude credentials file.
|
||||
|
||||
The token in the systemd .env goes stale (it rotates on each login).
|
||||
Reading directly from ~/.claude/.credentials.json always gets the latest.
|
||||
"""
|
||||
import json as _json
|
||||
creds_path = os.path.expanduser("~/.claude/.credentials.json")
|
||||
try:
|
||||
with open(creds_path) as f:
|
||||
data = _json.load(f)
|
||||
return data["claudeAiOauth"]["accessToken"]
|
||||
except Exception as e:
|
||||
logger.debug("Could not read Claude credentials file: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
async def _claude(system_prompt: str, messages: list[dict], model_cfg: dict | None) -> str:
|
||||
model_name = (model_cfg or {}).get("model_name") if model_cfg else None
|
||||
cmd = [
|
||||
"claude", "--print",
|
||||
"--no-session-persistence",
|
||||
"--output-format", "text",
|
||||
]
|
||||
# Only pass --model if it's a real model name (not a backend type string)
|
||||
if model_name and model_name not in ("claude", "gemini", "local", ""):
|
||||
cmd.extend(["--model", model_name])
|
||||
if system_prompt:
|
||||
cmd.extend(["--system-prompt", system_prompt])
|
||||
cmd.append(_build_conversation(messages))
|
||||
|
||||
# Always use the freshest token from the credentials file so the systemd
|
||||
# service doesn't break when the env-var token rotates after a login.
|
||||
env = os.environ.copy()
|
||||
token = _fresh_claude_token()
|
||||
if token:
|
||||
env["CLAUDE_CODE_OAUTH_TOKEN"] = token
|
||||
env.pop("ANTHROPIC_API_KEY", None) # never let a stale API key override OAuth
|
||||
|
||||
return await _run(cmd, timeout=settings.timeout_claude, env=env)
|
||||
|
||||
|
||||
async def _local(
|
||||
system_prompt: str,
|
||||
messages: list[dict],
|
||||
@@ -413,106 +322,3 @@ async def _local_streaming(
|
||||
return full_text.strip()
|
||||
|
||||
|
||||
async def _gemini(system_prompt: str, messages: list[dict]) -> str:
|
||||
# Gemini CLI spawns MCP child processes that keep stdout pipes open after responding.
|
||||
# start_new_session=True puts the whole tree in its own process group so
|
||||
# os.killpg kills everything at once on timeout.
|
||||
cmd = [
|
||||
"gemini",
|
||||
"--output-format", "text",
|
||||
"--extensions", "", # disable all extensions — prevents MCP child processes
|
||||
"-p", _build_prompt(system_prompt, messages),
|
||||
]
|
||||
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
start_new_session=True,
|
||||
)
|
||||
except FileNotFoundError:
|
||||
raise RuntimeError("gemini not found in PATH")
|
||||
|
||||
_register_pgroup(proc.pid)
|
||||
timeout = settings.timeout_gemini
|
||||
try:
|
||||
stdout_bytes, _ = await asyncio.wait_for(proc.communicate(), timeout=timeout)
|
||||
raw = stdout_bytes.decode()
|
||||
except asyncio.TimeoutError:
|
||||
try:
|
||||
os.killpg(proc.pid, signal.SIGKILL)
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
raise RuntimeError(f"Gemini timed out after {timeout}s")
|
||||
except asyncio.CancelledError:
|
||||
try:
|
||||
os.killpg(proc.pid, signal.SIGKILL)
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
raise
|
||||
finally:
|
||||
_unregister_pgroup(proc.pid)
|
||||
|
||||
clean = _clean_gemini_output(raw)
|
||||
if not clean:
|
||||
raise RuntimeError("Gemini returned an empty response")
|
||||
return clean
|
||||
|
||||
|
||||
# Lines Gemini CLI writes to stdout that are not part of the actual response
|
||||
_GEMINI_NOISE = (
|
||||
"Loaded cached credentials",
|
||||
"Loading extension:",
|
||||
"Server '",
|
||||
"Listening for",
|
||||
"Model is overloaded",
|
||||
"High demand",
|
||||
"Retrying",
|
||||
"retrying",
|
||||
"429",
|
||||
"quota",
|
||||
)
|
||||
|
||||
|
||||
def _clean_gemini_output(text: str) -> str:
|
||||
lines = [
|
||||
line for line in text.splitlines()
|
||||
if not any(line.strip().startswith(p) for p in _GEMINI_NOISE)
|
||||
]
|
||||
return "\n".join(lines).strip()
|
||||
|
||||
|
||||
async def _run(cmd: list[str], timeout: int = 60, env: dict | None = None) -> str:
|
||||
loop = asyncio.get_running_loop()
|
||||
result = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: subprocess.run(cmd, capture_output=True, text=True, timeout=timeout, env=env),
|
||||
)
|
||||
if result.returncode != 0:
|
||||
detail = result.stderr.strip() or result.stdout.strip() or f"exit code {result.returncode}"
|
||||
raise RuntimeError(f"{cmd[0]} failed: {detail}")
|
||||
return result.stdout.strip()
|
||||
|
||||
|
||||
def _build_conversation(messages: list[dict]) -> str:
|
||||
"""Conversation only — used for Claude (system prompt passed separately)."""
|
||||
parts = []
|
||||
prior = messages[:-1]
|
||||
if prior:
|
||||
history_lines = []
|
||||
for msg in prior:
|
||||
label = settings.user_name if msg["role"] == "user" else settings.agent_name
|
||||
history_lines.append(f"{label}: {msg['content']}")
|
||||
parts.append("<conversation>\n" + "\n\n".join(history_lines) + "\n</conversation>")
|
||||
parts.append(messages[-1]["content"] if messages else "")
|
||||
return "\n\n".join(parts)
|
||||
|
||||
|
||||
def _build_prompt(system_prompt: str, messages: list[dict]) -> str:
|
||||
"""Full prompt with system context embedded — used for Gemini."""
|
||||
parts = []
|
||||
if system_prompt:
|
||||
parts.append(f"<system>\n{system_prompt}\n</system>")
|
||||
parts.append(_build_conversation(messages))
|
||||
return "\n\n".join(parts)
|
||||
|
||||
@@ -18,8 +18,6 @@ async def lifespan(app: FastAPI):
|
||||
scheduler.start()
|
||||
yield
|
||||
scheduler.stop()
|
||||
from llm_client import cleanup
|
||||
await cleanup()
|
||||
|
||||
|
||||
app = FastAPI(title="Cortex Dispatcher", lifespan=lifespan)
|
||||
|
||||
@@ -1,76 +1,12 @@
|
||||
"""
|
||||
CLI auth status for both Claude and Gemini backends.
|
||||
|
||||
GET /auth/status — returns per-backend auth info and warning flags
|
||||
|
||||
Claude: warns when OAuth token is < WARN_HOURS from expiry (requires
|
||||
user to re-run `claude` to refresh via browser flow).
|
||||
Gemini: warns only when oauth_creds.json is missing or has no
|
||||
refresh_token (access token rotates automatically every ~1h).
|
||||
GET /auth/status — returns connectivity status for configured model backends.
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from fastapi import APIRouter
|
||||
from config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/auth")
|
||||
|
||||
CLAUDE_CREDS = Path.home() / ".claude" / ".credentials.json"
|
||||
GEMINI_CREDS = Path.home() / ".gemini" / "oauth_creds.json"
|
||||
GEMINI_ACCTS = Path.home() / ".gemini" / "google_accounts.json"
|
||||
WARN_HOURS = 24 # no refresh token — warn a day ahead
|
||||
WARN_HOURS_REFRESH = 1 # refresh token present — only warn if CLI hasn't rotated in time
|
||||
|
||||
|
||||
def _claude_status() -> dict:
|
||||
try:
|
||||
data = json.loads(CLAUDE_CREDS.read_text())
|
||||
oauth = data["claudeAiOauth"]
|
||||
has_refresh = bool(oauth.get("refreshToken"))
|
||||
expires_dt = datetime.fromtimestamp(oauth["expiresAt"] / 1000, tz=timezone.utc)
|
||||
now = datetime.now(tz=timezone.utc)
|
||||
hours_remaining = (expires_dt - now).total_seconds() / 3600
|
||||
# When a refresh token is present the CLI *should* auto-rotate the access
|
||||
# token, but sometimes it doesn't. Use a tight 1-hour window so a fresh
|
||||
# 8-hour token doesn't immediately trigger a warning, but a stale token
|
||||
# that the CLI missed will still surface before it expires.
|
||||
expired = hours_remaining <= 0
|
||||
threshold = WARN_HOURS_REFRESH if has_refresh else WARN_HOURS
|
||||
warning = expired or hours_remaining < threshold
|
||||
return {
|
||||
"ok": True,
|
||||
"has_refresh_token": has_refresh,
|
||||
"access_token_expires_at": expires_dt.isoformat(),
|
||||
"access_token_hours_remaining": round(hours_remaining, 1),
|
||||
"warning": warning,
|
||||
"expired": expired,
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning("claude auth check failed: %s", e)
|
||||
return {"ok": False, "error": str(e), "warning": True, "expired": False}
|
||||
|
||||
|
||||
def _gemini_status() -> dict:
|
||||
try:
|
||||
creds = json.loads(GEMINI_CREDS.read_text())
|
||||
if not creds.get("refresh_token"):
|
||||
return {"ok": True, "authenticated": False, "warning": True, "account": None}
|
||||
account = None
|
||||
try:
|
||||
accts = json.loads(GEMINI_ACCTS.read_text())
|
||||
account = accts.get("active")
|
||||
except Exception:
|
||||
pass
|
||||
return {"ok": True, "authenticated": True, "warning": False, "account": account}
|
||||
except FileNotFoundError:
|
||||
return {"ok": True, "authenticated": False, "warning": True, "account": None}
|
||||
except Exception as e:
|
||||
logger.warning("gemini auth check failed: %s", e)
|
||||
return {"ok": False, "error": str(e), "warning": True, "authenticated": False}
|
||||
|
||||
|
||||
async def _local_status(username: str = "scott") -> dict:
|
||||
"""Check reachability of the user's configured local model host."""
|
||||
@@ -104,7 +40,5 @@ async def _local_status(username: str = "scott") -> dict:
|
||||
@router.get("/status")
|
||||
async def auth_status() -> dict:
|
||||
return {
|
||||
"claude": _claude_status(),
|
||||
"gemini": _gemini_status(),
|
||||
"local": await _local_status(),
|
||||
}
|
||||
|
||||
@@ -21,11 +21,7 @@ router = APIRouter()
|
||||
|
||||
|
||||
def _backend_label(backend: str, username: str, role: str = "chat") -> str:
|
||||
"""Human-readable label for the model that handled a request (legacy path)."""
|
||||
if backend == "claude":
|
||||
return "Claude"
|
||||
if backend == "gemini":
|
||||
return "Gemini"
|
||||
"""Human-readable label for the model that handled a request."""
|
||||
if backend == "local":
|
||||
cfg = model_registry.get_best_local_model(username, role)
|
||||
if cfg:
|
||||
@@ -52,7 +48,7 @@ class ChatRequest(BaseModel):
|
||||
message: str
|
||||
session_id: str | None = None
|
||||
tier: int | None = None
|
||||
model: str | None = None # legacy backend override ("claude"|"gemini"|"local")
|
||||
model: str | None = None # ignored — kept for API compatibility
|
||||
slot: str | None = None # Phase 3: explicit slot ("primary"|"backup_1"|"backup_2")
|
||||
chat_role: str = "chat" # active role: "chat"|"coder"|"research"|"distill" etc.
|
||||
include_long: bool = True
|
||||
@@ -64,10 +60,6 @@ class ChatRequest(BaseModel):
|
||||
attachment: Attachment | None = None # image attachment (text files injected client-side)
|
||||
|
||||
|
||||
class BackendRequest(BaseModel):
|
||||
primary: str # "claude", "gemini", or "local"
|
||||
|
||||
|
||||
class NoteRequest(BaseModel):
|
||||
session_id: str
|
||||
note: str
|
||||
@@ -183,9 +175,6 @@ async def _stream_chat(req: ChatRequest):
|
||||
yield f"data: {json.dumps({'type': 'error', 'message': str(e)})}\n\n"
|
||||
|
||||
finally:
|
||||
# Ensure the LLM task is cancelled if the generator is torn down
|
||||
# (e.g. client disconnect or server shutdown). This propagates
|
||||
# CancelledError into _gemini() which kills the process group.
|
||||
if not task.done():
|
||||
task.cancel()
|
||||
try:
|
||||
@@ -203,10 +192,6 @@ async def chat(req: ChatRequest) -> StreamingResponse:
|
||||
)
|
||||
|
||||
|
||||
_BACKEND_CYCLE = ("claude", "gemini", "local")
|
||||
_BACKEND_FALLBACK = {"claude": "gemini", "gemini": "claude", "local": "claude"}
|
||||
|
||||
|
||||
def _request_user(request: Request) -> str | None:
|
||||
"""Extract username from JWT cookie, or None."""
|
||||
try:
|
||||
@@ -216,20 +201,6 @@ def _request_user(request: Request) -> str | None:
|
||||
return None
|
||||
|
||||
|
||||
def _local_model_info(request: Request) -> dict | None:
|
||||
"""Return the best local model {label, model_name} for the session user, or None."""
|
||||
username = _request_user(request)
|
||||
if not username:
|
||||
return None
|
||||
try:
|
||||
cfg = model_registry.get_best_local_model(username, "chat")
|
||||
if cfg:
|
||||
return {"label": cfg.get("label", ""), "model_name": cfg.get("model_name", "")}
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def _chat_slot_models(username: str) -> list[dict]:
|
||||
"""Return [{slot, label, type}] for each configured slot in the chat role, primary first."""
|
||||
registry = model_registry.get_registry(username)
|
||||
@@ -279,7 +250,6 @@ async def get_backend(request: Request) -> dict:
|
||||
username = _request_user(request)
|
||||
chat_models = _chat_slot_models(username) if username else []
|
||||
available_roles = _available_roles_for_toggle(username) if username else []
|
||||
p = settings.primary_backend
|
||||
|
||||
orch_label = None
|
||||
if username:
|
||||
@@ -288,25 +258,9 @@ async def get_backend(request: Request) -> dict:
|
||||
orch_label = orch_cfg.get("label") or orch_cfg.get("model_name") or None
|
||||
|
||||
return {
|
||||
"chat_models": chat_models, # Phase 3: [{slot, label, type}] for chat-role slots
|
||||
"available_roles": available_roles, # kept for banner + backward compat
|
||||
"chat_models": chat_models,
|
||||
"available_roles": available_roles,
|
||||
"orchestrator_model": orch_label,
|
||||
# Legacy fields kept for backward compat
|
||||
"primary": p,
|
||||
"fallback": _BACKEND_FALLBACK.get(p, "claude"),
|
||||
"local_model": _local_model_info(request),
|
||||
}
|
||||
|
||||
|
||||
@router.post("/backend")
|
||||
async def set_backend(req: BackendRequest, request: Request) -> dict:
|
||||
if req.primary not in _BACKEND_CYCLE:
|
||||
raise HTTPException(status_code=400, detail="primary must be 'claude', 'gemini', or 'local'")
|
||||
settings.primary_backend = req.primary
|
||||
return {
|
||||
"primary": req.primary,
|
||||
"fallback": _BACKEND_FALLBACK[req.primary],
|
||||
"local_model": _local_model_info(request),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -744,6 +744,53 @@ async def remove_custom_role_route(
|
||||
return RedirectResponse("/settings/models#roles", status_code=303)
|
||||
|
||||
|
||||
@router.post("/api/models/{model_id}/edit")
|
||||
async def edit_model_ajax(
|
||||
request: Request,
|
||||
model_id: str,
|
||||
mtype: str = Form(""),
|
||||
label: str = Form(""),
|
||||
model_name: str = Form(""),
|
||||
context_k: int = Form(0),
|
||||
max_rounds: int = Form(0),
|
||||
tools: int = Form(1),
|
||||
tags: str = Form(""),
|
||||
reasoning_budget_tokens: int = Form(0),
|
||||
host_id: str = Form(""),
|
||||
account_id: str = Form(""),
|
||||
credential_id: str = Form("cli"),
|
||||
) -> JSONResponse:
|
||||
"""AJAX: edit a model entry. Returns JSON {ok, label, model_name} on success."""
|
||||
username = _get_user(request)
|
||||
if not username:
|
||||
return JSONResponse({"error": "Not authenticated"}, status_code=401)
|
||||
if not model_name.strip():
|
||||
return JSONResponse({"error": "Model name is required."}, status_code=400)
|
||||
tag_list = [t.strip() for t in tags.split(",") if t.strip()]
|
||||
max_rounds_ = max_rounds or None
|
||||
tools_bool = tools != 0
|
||||
reasoning_budget_ = reasoning_budget_tokens or None
|
||||
if mtype == "local_openai":
|
||||
if not host_id.strip():
|
||||
return JSONResponse({"error": "Select a host for this model."}, status_code=400)
|
||||
reg.save_model(username, model_id, host_id, label, model_name, context_k, tag_list,
|
||||
max_rounds=max_rounds_, tools=tools_bool,
|
||||
reasoning_budget_tokens=reasoning_budget_)
|
||||
elif mtype == "gemini_api":
|
||||
reg.save_cloud_model(username, model_id, "google", model_name, label,
|
||||
account_id=account_id or None, context_k=context_k, tags=tag_list,
|
||||
max_rounds=max_rounds_, tools=tools_bool)
|
||||
elif mtype in ("claude_cli", "anthropic_api"):
|
||||
reg.save_cloud_model(username, model_id, "anthropic", model_name, label,
|
||||
credential_id=credential_id or "cli", context_k=context_k, tags=tag_list,
|
||||
max_rounds=max_rounds_, tools=tools_bool)
|
||||
else:
|
||||
return JSONResponse({"error": f"Unknown model type: {mtype}"}, status_code=400)
|
||||
display = label.strip() or model_name.strip()
|
||||
logger.info("model edited (ajax): %s / %s (%s)", username, display, mtype)
|
||||
return JSONResponse({"ok": True, "label": display, "model_name": model_name.strip()})
|
||||
|
||||
|
||||
@router.post("/api/models/role")
|
||||
async def set_role(request: Request) -> JSONResponse:
|
||||
"""AJAX: assign a model to a role priority slot.
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
and are appended automatically by help.html when present.
|
||||
-->
|
||||
|
||||
*Last updated: 2026-05-13*
|
||||
*Last updated: 2026-06-18* <!-- input toolbar refactor; XL size added; help doc sync -->
|
||||
|
||||
---
|
||||
|
||||
@@ -44,7 +44,7 @@ The **Context & Memory** panel (sliders icon with tier number) contains all conf
|
||||
| **Memory Layers** | Toggle Long / Mid / Short memory on/off |
|
||||
| **Distill Memory** | Manually trigger Short / Mid / Long / All distillation |
|
||||
| **Model** | Active chat model — click to cycle through your configured slot models (Primary → Backup 1 → …) |
|
||||
| **Display** | **Aa** cycles font size · **☾** toggles theme · **S/M/L** cycles input area height · **⌃↵** toggles send shortcut |
|
||||
| **Display** | **Aa** cycles font size · **☾** toggles theme · **S/M/L/XL** cycles input area height · **⌃↵** toggles send shortcut |
|
||||
|
||||
All settings persist in `localStorage` across page refreshes.
|
||||
|
||||
@@ -74,7 +74,7 @@ The orchestrator runs a multi-step tool loop:
|
||||
3. The model produces the final user-facing reply — when the orchestrator role uses Gemini, Claude writes the final response; when it uses a local model, that same model writes it
|
||||
4. Expandable tool-call cards appear above the response — click any card to see the arguments sent and the result returned
|
||||
|
||||
The ⚡ toggle is **independent of the Role selector** — you can use any role (chat, coder, research, etc.) with or without tools. The orchestrator model is configured in **Account → Model Registry → Role Assignments → Orchestrator**.
|
||||
The ⚡ toggle routes requests through the **Orchestrator** role model regardless of which chat model is active. Configure it in **Account → Model Registry → Role Assignments → Orchestrator**.
|
||||
|
||||
Tools mode is best for tasks requiring research, multi-step reasoning, or side effects (e.g. "search for X", "add a task", "what's on my list?", "append this to my journal"). Regular chat is faster for conversational turns.
|
||||
|
||||
@@ -156,7 +156,7 @@ Once installed, opening Cortex from the home screen or app launcher skips the br
|
||||
|
||||
## Switching Models
|
||||
|
||||
The **Model** button in the Context & Memory panel cycles through the slot models configured for your active role (Primary → Backup 1). Click it to switch between models mid-session.
|
||||
The **Model** button in the Context & Memory panel cycles through the slot models configured for your **Chat** role (Primary → Backup 1). Click it to switch between models mid-session.
|
||||
|
||||
- The button label shows the active model (e.g. "GPT-4o", "Gemini 2.5 Flash")
|
||||
- The selected slot is sent with each chat request so the correct model is used
|
||||
@@ -205,12 +205,11 @@ The table shows all-time totals per model key, with columns for:
|
||||
|
||||
Values ≥ 1,000 are displayed as `k` (e.g. `24.3k`).
|
||||
|
||||
**What is and isn't tracked:**
|
||||
**What is tracked:**
|
||||
|
||||
- ✅ Gemini API calls (orchestrator, distillation)
|
||||
- ✅ Anthropic API calls (direct SDK)
|
||||
- ✅ Local OpenAI-compatible calls (Open WebUI, Ollama, OpenRouter)
|
||||
- ✗ Claude CLI — no structured token data is returned by the subprocess
|
||||
- ✗ Gemini CLI — same reason
|
||||
- ✅ Gemini API calls (orchestrator, distillation)
|
||||
|
||||
The raw data lives in `home/{username}/usage.json` and is also accessible via the Files panel or the API.
|
||||
|
||||
@@ -230,9 +229,10 @@ Configure which AI models are available and which handles each task type.
|
||||
|
||||
Do this before adding models — models need a provider account or local host to attach to.
|
||||
|
||||
**Anthropic (Claude):** Two options:
|
||||
- **CLI (OAuth):** Nothing to configure — uses your existing `claude auth login` session. If Claude isn't working, run `claude auth login` in a terminal.
|
||||
- **Direct API key:** Scroll to **Cloud Providers → Anthropic** → click **+ Add API key**. Enter a label and your `sk-ant-…` key from [console.anthropic.com/keys](https://console.anthropic.com/keys). When you add a model using an API key credential, it routes through the Anthropic SDK instead of the CLI.
|
||||
**Anthropic (Claude):** Uses a direct API key — no Claude CLI required:
|
||||
- Scroll to **Cloud Providers → Anthropic** → click **+ Add API key**
|
||||
- Enter a label and your `sk-ant-…` key from [console.anthropic.com/keys](https://console.anthropic.com/keys)
|
||||
- Models added with this credential call the Anthropic API directly via the SDK
|
||||
|
||||
**Google (Gemini):** Add one entry per API key you want to use:
|
||||
1. Scroll to **Cloud Providers → Google** → click **+ Add Google account**
|
||||
@@ -261,7 +261,7 @@ Scroll to **Add Model**. Select the provider tab, fill in the details, click **A
|
||||
|---|---|
|
||||
| **Local** | Select a host (from Step 1) → enter model name, or use **Fetch from host** to pick from a live list |
|
||||
| **Google** | Select a Gemini model from the catalog → select a Google account (from Step 1) |
|
||||
| **Anthropic** | Select a credential (CLI OAuth or an API key added in Step 1) → select a Claude model from the catalog |
|
||||
| **Anthropic** | Select an API key credential (from Step 1) → select a Claude model from the catalog |
|
||||
|
||||
The label and context window size auto-fill from the catalog — edit them if you want. Tags are optional.
|
||||
|
||||
@@ -286,7 +286,7 @@ Scroll to **Role Assignments** at the bottom of the page. Each role has **Primar
|
||||
| **Coder** | Code-focused tasks — larger context window, code-aware model |
|
||||
| **Research** | Long-context research — high-token model, web tools prioritized |
|
||||
|
||||
Switch roles via the **Role** selector in the Context & Memory panel (⚙). Leave all slots empty to use the server default.
|
||||
Leave all slots empty to use the server default.
|
||||
|
||||
**Per-role tool sets:** Expand any role card to configure which tool categories the orchestrator can use when that role is active. Unchecked categories are hidden from the model entirely — reducing token overhead on every orchestrated call. Leaving all categories unchecked means all tools the user has access to are available (the default).
|
||||
|
||||
@@ -390,6 +390,7 @@ Distillation builds up the memory layers from raw session logs. Runs automatical
|
||||
| **mid** | LLM summarizes `MEMORY_SHORT.md` → `MEMORY_MID.md` |
|
||||
| **long** | LLM integrates `MEMORY_MID.md` → `MEMORY_LONG.md` |
|
||||
| **all** | Runs short → mid → long in sequence |
|
||||
| **Rebuild** | ⚠ Wipes Mid + Long memories and rebuilds from session logs. Use to recover from distillation drift. Hand-edited content will be replaced. |
|
||||
|
||||
**Recommended workflow:** run **short** after any productive session; **mid** weekly; **long** monthly.
|
||||
|
||||
@@ -462,8 +463,7 @@ For direct access or scripting:
|
||||
| Method | Endpoint | Description |
|
||||
|---|---|---|
|
||||
| `POST` | `/chat` | Send a message — returns SSE stream |
|
||||
| `GET` | `/backend` | Get current primary/fallback backends |
|
||||
| `POST` | `/backend` | Set primary backend (`{"primary": "claude"}`) |
|
||||
| `GET` | `/backend` | Get configured model slots and orchestrator |
|
||||
| `GET` | `/sessions` | List all sessions |
|
||||
| `GET` | `/history/{id}` | Get session message history |
|
||||
| `PUT` | `/history/{id}` | Replace full session history |
|
||||
|
||||
@@ -140,15 +140,16 @@
|
||||
});
|
||||
|
||||
// ── Textarea height ──────────────────────────────────────────
|
||||
const HEIGHT_SIZES = [120, 240, 480];
|
||||
const HEIGHT_LABELS = ['S', 'M', 'L'];
|
||||
const HEIGHT_SIZES = [120, 240, 480, 720];
|
||||
const HEIGHT_LABELS = ['S', 'M', 'L', 'XL'];
|
||||
const HEIGHT_TITLES = [
|
||||
'Input size: Compact — click to cycle',
|
||||
'Input size: Medium — click to cycle',
|
||||
'Input size: Large — click to cycle',
|
||||
'Input size: Extra Large — click to cycle',
|
||||
];
|
||||
|
||||
let maxHeight = parseInt(localStorage.getItem('maxHeight') || '120');
|
||||
let maxHeight = parseInt(localStorage.getItem('maxHeight') || '240');
|
||||
const heightCycleBtn = document.getElementById('height-cycle-btn');
|
||||
|
||||
function syncHeight() {
|
||||
|
||||
@@ -115,9 +115,9 @@
|
||||
<div id="ctx-schedule"></div>
|
||||
</div>
|
||||
<div class="ctx-section">
|
||||
<div class="ctx-section-title">Role</div>
|
||||
<div class="ctx-section-title">Model</div>
|
||||
<div class="ctx-row">
|
||||
<button id="backend-toggle" class="ctx-btn" title="Active role — click to cycle">chat</button>
|
||||
<button id="backend-toggle" class="ctx-btn" title="Active model — click to cycle chat role slots">chat</button>
|
||||
</div>
|
||||
<div id="backend-model-hint"></div>
|
||||
</div>
|
||||
@@ -167,24 +167,6 @@
|
||||
<div id="messages"></div>
|
||||
|
||||
<div id="input-area">
|
||||
<!-- Mode select — compact dropdown, opens upward, MRU sorted -->
|
||||
<div id="mode-select">
|
||||
<button id="mode-select-btn" title="Input mode">
|
||||
<span id="mode-icon">💬</span>
|
||||
<span id="mode-label">Chat</span>
|
||||
<span class="mode-arrow">▲</span>
|
||||
</button>
|
||||
<!-- Populated dynamically in MRU order -->
|
||||
<div id="mode-dropdown"></div>
|
||||
<!-- Note visibility sub-toggle — only shown when note mode is active -->
|
||||
<button id="note-vis-btn" title="Toggle note visibility (private / public)">prv</button>
|
||||
<!-- Tools toggle — routes through the orchestrator tool loop when active -->
|
||||
<button id="tools-toggle" title="Tools disabled — click to enable">⚡</button>
|
||||
<!-- Attach file — images (vision) or text/code files -->
|
||||
<button id="attach-btn" title="Attach image or text file">📎</button>
|
||||
<input type="file" id="file-input" style="display:none"
|
||||
accept="image/png,image/jpeg,image/webp,image/gif,text/plain,text/markdown,.md,.txt,.py,.js,.ts,.jsx,.tsx,.json,.yaml,.yml,.toml,.html,.css,.sh,.csv,.xml,.rs,.go,.java,.c,.cpp,.h,.rb,.php,.swift,.kt,.sql">
|
||||
</div>
|
||||
<!-- Attachment preview — shown when a file is pending -->
|
||||
<div id="attachment-row" style="display:none">
|
||||
<div id="attachment-preview">
|
||||
@@ -195,7 +177,26 @@
|
||||
</div>
|
||||
</div>
|
||||
<textarea id="input" rows="1" placeholder="Message…" autofocus></textarea>
|
||||
<div id="send-col">
|
||||
<!-- Compact toolbar: mode, tools, attach | spacer | send/stop -->
|
||||
<div id="input-toolbar">
|
||||
<div id="mode-select">
|
||||
<button id="mode-select-btn" title="Input mode">
|
||||
<span id="mode-icon">💬</span>
|
||||
<span id="mode-label">Chat</span>
|
||||
<span class="mode-arrow">▲</span>
|
||||
</button>
|
||||
<!-- Populated dynamically in MRU order -->
|
||||
<div id="mode-dropdown"></div>
|
||||
</div>
|
||||
<!-- Note visibility sub-toggle — only shown when note mode is active -->
|
||||
<button id="note-vis-btn" title="Toggle note visibility (private / public)">prv</button>
|
||||
<!-- Tools toggle — routes through the orchestrator tool loop when active -->
|
||||
<button id="tools-toggle" title="Tools disabled — click to enable">⚡</button>
|
||||
<!-- Attach file — images (vision) or text/code files -->
|
||||
<button id="attach-btn" title="Attach image or text file">📎</button>
|
||||
<input type="file" id="file-input" style="display:none"
|
||||
accept="image/png,image/jpeg,image/webp,image/gif,text/plain,text/markdown,.md,.txt,.py,.js,.ts,.jsx,.tsx,.json,.yaml,.yml,.toml,.html,.css,.sh,.csv,.xml,.rs,.go,.java,.c,.cpp,.h,.rb,.php,.swift,.kt,.sql">
|
||||
<div style="flex:1"></div>
|
||||
<button id="send">Send</button>
|
||||
<button id="stop"><svg data-lucide="square" width="14" height="14" class="btn-icon"></svg> Stop</button>
|
||||
</div>
|
||||
|
||||
@@ -982,6 +982,42 @@
|
||||
});
|
||||
});
|
||||
|
||||
// ── Model edit: AJAX save (stay on Models tab) ────────────────────────────
|
||||
document.querySelectorAll('.model-edit-form').forEach(form => {
|
||||
form.addEventListener('submit', async e => {
|
||||
e.preventDefault();
|
||||
const id = form.id.replace('edit-form-', '');
|
||||
const saveBtn = form.querySelector('button[type="submit"]');
|
||||
saveBtn.disabled = true;
|
||||
try {
|
||||
const res = await fetch(`/api/models/${id}/edit`, {method: 'POST', body: new FormData(form)});
|
||||
const data = await res.json();
|
||||
if (data.ok) {
|
||||
// Update the row header label in place
|
||||
const row = document.getElementById('model-' + id);
|
||||
if (row && data.label) {
|
||||
const labelEl = row.querySelector('.model-label');
|
||||
if (labelEl) labelEl.textContent = data.label;
|
||||
}
|
||||
if (row && data.model_name) {
|
||||
const nameEl = row.querySelector('.model-name');
|
||||
if (nameEl) nameEl.textContent = data.model_name;
|
||||
}
|
||||
// Close the edit panel
|
||||
form.style.display = 'none';
|
||||
document.querySelector(`.model-edit-btn[data-id="${id}"]`).textContent = 'Edit';
|
||||
showToast('Model saved');
|
||||
} else {
|
||||
showToast(data.error || 'Save failed', true);
|
||||
}
|
||||
} catch (err) {
|
||||
showToast(err.message, true);
|
||||
} finally {
|
||||
saveBtn.disabled = false;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// ── Edit form: fetch from host ────────────────────────────────────────────
|
||||
document.querySelectorAll('.edit-fetch-btn').forEach(btn => {
|
||||
btn.addEventListener('click', async () => {
|
||||
|
||||
@@ -735,35 +735,28 @@
|
||||
.message.note-private .note-content { color: #c9a84c; white-space: pre-wrap; }
|
||||
.message.note-public .note-content { color: #4abfb0; white-space: pre-wrap; }
|
||||
|
||||
/* ── Input area — 3-col: [mode-toggle] [textarea] [send-col] ── */
|
||||
/* ── Input area — column: [attachment?] [textarea] [toolbar] ── */
|
||||
#input-area {
|
||||
padding: 12px 20px;
|
||||
padding: 10px 20px 12px;
|
||||
background: var(--surface);
|
||||
border-top: 1px solid var(--border);
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
gap: 10px;
|
||||
align-items: flex-end;
|
||||
flex-direction: column;
|
||||
gap: 6px;
|
||||
}
|
||||
|
||||
/* ── Mode select — compact dropdown ─────────────────────────── */
|
||||
/* ── Compact toolbar below the textarea ─────────────────────── */
|
||||
#input-toolbar {
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
align-items: center;
|
||||
gap: 6px;
|
||||
}
|
||||
|
||||
/* ── Mode select — positioned container for dropdown only ────── */
|
||||
#mode-select {
|
||||
position: relative;
|
||||
flex-shrink: 0;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: stretch;
|
||||
gap: 4px;
|
||||
}
|
||||
|
||||
/* S: collapse to a single row — mode button + compact tools toggle */
|
||||
#mode-select[data-size="s"] {
|
||||
flex-direction: row;
|
||||
align-items: center;
|
||||
}
|
||||
#mode-select[data-size="s"] #tools-toggle {
|
||||
padding: 3px 7px;
|
||||
font-size: 0.75rem;
|
||||
}
|
||||
|
||||
#mode-select-btn {
|
||||
@@ -874,8 +867,7 @@
|
||||
#attach-btn:hover { color: rgba(255,255,255,0.6); border-color: rgba(255,255,255,0.25); }
|
||||
|
||||
#attachment-row {
|
||||
padding: 0.3rem 0.5rem;
|
||||
border-bottom: 1px solid var(--border);
|
||||
padding: 0.2rem 0;
|
||||
}
|
||||
#attachment-preview {
|
||||
display: inline-flex;
|
||||
@@ -914,7 +906,8 @@
|
||||
#attachment-clear:hover { color: var(--text); }
|
||||
|
||||
#input {
|
||||
flex: 1;
|
||||
width: 100%;
|
||||
box-sizing: border-box;
|
||||
background: var(--bg);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 8px;
|
||||
@@ -936,16 +929,7 @@
|
||||
#input.mode-note.public:focus { border-color: rgba(40,170,150,0.85); }
|
||||
#input.mode-otr { border-color: rgba(120,80,160,0.4); background: rgba(120,80,160,0.04); }
|
||||
|
||||
/* Send column — right side, stacked */
|
||||
#send-col {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: stretch;
|
||||
gap: 4px;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
/* Send button */
|
||||
/* Send button — sits in #input-toolbar row */
|
||||
#send {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
@@ -955,11 +939,12 @@
|
||||
border: 1px solid var(--user-border);
|
||||
color: var(--text);
|
||||
border-radius: 8px;
|
||||
padding: 10px 14px;
|
||||
padding: 7px 16px;
|
||||
cursor: pointer;
|
||||
font-size: 0.9rem;
|
||||
text-align: center;
|
||||
white-space: nowrap;
|
||||
flex-shrink: 0;
|
||||
transition: background 0.15s;
|
||||
}
|
||||
|
||||
@@ -977,10 +962,11 @@
|
||||
border: 1px solid var(--error-border);
|
||||
color: var(--error-text);
|
||||
border-radius: 8px;
|
||||
padding: 10px 14px;
|
||||
padding: 7px 14px;
|
||||
cursor: pointer;
|
||||
font-size: 0.9rem;
|
||||
text-align: center;
|
||||
flex-shrink: 0;
|
||||
transition: background 0.15s;
|
||||
}
|
||||
|
||||
|
||||
31
cortex/tools/_projects.py
Normal file
31
cortex/tools/_projects.py
Normal file
@@ -0,0 +1,31 @@
|
||||
"""Shared project alias registry for Cortex tools."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProjectDef:
|
||||
path: str # path on the host where the project lives (~ is expanded at runtime)
|
||||
ssh_host: str = "" # if set, git/aider commands run via SSH on this host
|
||||
|
||||
|
||||
_CORTEX_ROOT_STR: str = str(Path(__file__).parent.parent.parent.resolve())
|
||||
|
||||
PROJECT_ALIASES: dict[str, ProjectDef] = {
|
||||
"cortex": ProjectDef(path=_CORTEX_ROOT_STR),
|
||||
"aether_api": ProjectDef(
|
||||
path="~/OSIT_dev/aether_api_fastapi",
|
||||
ssh_host="scott-wks-main-i7",
|
||||
),
|
||||
"aether_frontend": ProjectDef(
|
||||
path="~/OSIT_dev/aether_app_sveltekit",
|
||||
ssh_host="scott-wks-main-i7",
|
||||
),
|
||||
"aether_container": ProjectDef(
|
||||
path="~/OSIT_dev/aether_container_env",
|
||||
ssh_host="scott-wks-main-i7",
|
||||
),
|
||||
}
|
||||
@@ -16,25 +16,16 @@ background=True runs the subprocess asynchronously and returns an agent_id immed
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import shlex
|
||||
from pathlib import Path
|
||||
|
||||
from google.genai import types
|
||||
|
||||
import agent_manager
|
||||
from ._projects import PROJECT_ALIASES
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_CORTEX_DIR = Path(__file__).parent # .../Cortex_and_Inara_dev/cortex/
|
||||
_PROJECT_ROOT = _CORTEX_DIR.parent # .../Cortex_and_Inara_dev/
|
||||
|
||||
# Known project aliases — expand before passing to subprocess
|
||||
_PROJECT_ALIASES: dict[str, str] = {
|
||||
"cortex": str(_PROJECT_ROOT),
|
||||
"aether_api": "~/OSIT_dev/aether_api_fastapi",
|
||||
"aether_frontend": "~/OSIT_dev/aether_app_sveltekit",
|
||||
"aether_container": "~/OSIT_dev/aether_container_env",
|
||||
}
|
||||
|
||||
_MAX_OUTPUT_CHARS = 12_000
|
||||
|
||||
# Maps URL fragments → Aider --api-key provider slug.
|
||||
@@ -192,11 +183,16 @@ async def aider_run(
|
||||
immediately. Use agent_status(agent_id) to check progress; set notify=True to
|
||||
receive a push/Talk notification on completion.
|
||||
"""
|
||||
resolved = _PROJECT_ALIASES.get(project, project)
|
||||
cwd = Path(os.path.expanduser(resolved))
|
||||
proj_def = PROJECT_ALIASES.get(project)
|
||||
if proj_def is not None:
|
||||
cwd = Path(os.path.expanduser(proj_def.path))
|
||||
ssh_host = proj_def.ssh_host
|
||||
else:
|
||||
cwd = Path(os.path.expanduser(project))
|
||||
ssh_host = ""
|
||||
|
||||
if not cwd.is_dir():
|
||||
return f"Error: project directory '{resolved}' does not exist."
|
||||
if not ssh_host and not cwd.is_dir():
|
||||
return f"Error: project directory '{cwd}' does not exist."
|
||||
|
||||
timeout = min(max(int(timeout), 10), 600)
|
||||
|
||||
@@ -232,11 +228,22 @@ async def aider_run(
|
||||
cmd += ["--file", f]
|
||||
|
||||
logger.info(
|
||||
"aider_run: project=%s model=%s host_label=%s auto_commit=%s background=%s task=%.120s",
|
||||
project, model, host_label, auto_commit, background, task,
|
||||
"aider_run: project=%s ssh_host=%s model=%s host_label=%s auto_commit=%s background=%s task=%.120s",
|
||||
project, ssh_host or "local", model, host_label, auto_commit, background, task,
|
||||
)
|
||||
|
||||
async def _run() -> str:
|
||||
if ssh_host:
|
||||
# Run aider natively on the remote host via a login shell so PATH
|
||||
# includes ~/.local/bin where aider is typically installed.
|
||||
inner_cmd = "cd " + shlex.quote(str(cwd)) + " && " + shlex.join(cmd)
|
||||
ssh_cmd = f"bash -l -c {shlex.quote(inner_cmd)}"
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
"ssh", ssh_host, ssh_cmd,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
else:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd,
|
||||
cwd=str(cwd),
|
||||
@@ -323,6 +330,8 @@ DECLARATIONS = [
|
||||
"Credentials are resolved automatically from the Cortex model registry — "
|
||||
"OpenRouter, local Open WebUI/Ollama, Anthropic API, and other configured hosts "
|
||||
"are all supported. Use host_label to pick a specific host. "
|
||||
"aether_api, aether_frontend, and aether_container run aider natively on the "
|
||||
"workstation (scott-wks-main-i7) via SSH — aider must be installed there. "
|
||||
"Set background=True for long tasks — returns an agent_id immediately and sends "
|
||||
"a notification when done. ADMIN ONLY. Requires confirmation."
|
||||
),
|
||||
|
||||
@@ -13,26 +13,23 @@ Write operations (admin-only, confirm-required):
|
||||
All tools accept an optional `project` parameter using the same aliases as aider_run:
|
||||
"cortex" (default), "aether_api", "aether_frontend", "aether_container"
|
||||
Or pass an absolute path directly.
|
||||
|
||||
Projects with an ssh_host defined in _projects.py run all git commands on the remote
|
||||
host via SSH, using shlex-quoted commands to handle paths and arguments safely.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import shlex
|
||||
from pathlib import Path
|
||||
|
||||
from google.genai import types
|
||||
|
||||
from ._projects import PROJECT_ALIASES
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_CORTEX_ROOT: Path = Path(__file__).parent.parent.parent.resolve()
|
||||
|
||||
_PROJECT_ALIASES: dict[str, str] = {
|
||||
"cortex": str(_CORTEX_ROOT),
|
||||
"aether_api": "~/OSIT_dev/aether_api_fastapi",
|
||||
"aether_frontend": "~/OSIT_dev/aether_app_sveltekit",
|
||||
"aether_container": "~/OSIT_dev/aether_container_env",
|
||||
}
|
||||
|
||||
_MAX_OUTPUT = 50_000
|
||||
|
||||
_PROJECT_PARAM = types.Schema(
|
||||
@@ -45,16 +42,29 @@ _PROJECT_PARAM = types.Schema(
|
||||
)
|
||||
|
||||
|
||||
def _resolve_project(project: str) -> Path:
|
||||
"""Resolve a project alias or path string to an absolute Path."""
|
||||
def _resolve_project(project: str) -> tuple[Path, str]:
|
||||
"""Return (path, ssh_host). path may not exist locally when ssh_host is set."""
|
||||
if not project:
|
||||
return _CORTEX_ROOT
|
||||
resolved = _PROJECT_ALIASES.get(project, project)
|
||||
return Path(os.path.expanduser(resolved))
|
||||
d = PROJECT_ALIASES["cortex"]
|
||||
else:
|
||||
d = PROJECT_ALIASES.get(project)
|
||||
if d is None:
|
||||
# Raw path — no SSH routing
|
||||
return Path(os.path.expanduser(project)), ""
|
||||
return Path(os.path.expanduser(d.path)), d.ssh_host
|
||||
|
||||
|
||||
async def _git(*args: str, cwd: Path, timeout: int = 15) -> tuple[int, str]:
|
||||
"""Run a git command in cwd. Returns (returncode, combined output)."""
|
||||
async def _git(*args: str, cwd: Path, ssh_host: str = "", timeout: int = 15) -> tuple[int, str]:
|
||||
"""Run a git command locally or via SSH. Returns (returncode, combined output)."""
|
||||
if ssh_host:
|
||||
# Build a single shell-safe command string for the remote shell
|
||||
remote_cmd = shlex.join(["git", "-C", str(cwd)] + list(args))
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
"ssh", ssh_host, remote_cmd,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
else:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
"git", "-C", str(cwd), *args,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
@@ -80,10 +90,10 @@ def _cap(text: str) -> str:
|
||||
|
||||
async def git_status(project: str = "") -> str:
|
||||
"""Return the working tree status for a project."""
|
||||
cwd = _resolve_project(project)
|
||||
if not cwd.is_dir():
|
||||
cwd, ssh_host = _resolve_project(project)
|
||||
if not ssh_host and not cwd.is_dir():
|
||||
return f"Error: project directory not found: {cwd}"
|
||||
rc, out = await _git("status", cwd=cwd)
|
||||
rc, out = await _git("status", cwd=cwd, ssh_host=ssh_host)
|
||||
if rc != 0:
|
||||
return f"git status failed: {out}"
|
||||
return out or "Working tree clean — nothing to report."
|
||||
@@ -91,8 +101,8 @@ async def git_status(project: str = "") -> str:
|
||||
|
||||
async def git_log(n: int = 20, path: str = "", oneline: bool = True, project: str = "") -> str:
|
||||
"""Return recent commit history for a project."""
|
||||
cwd = _resolve_project(project)
|
||||
if not cwd.is_dir():
|
||||
cwd, ssh_host = _resolve_project(project)
|
||||
if not ssh_host and not cwd.is_dir():
|
||||
return f"Error: project directory not found: {cwd}"
|
||||
args = ["log"]
|
||||
if oneline:
|
||||
@@ -102,7 +112,7 @@ async def git_log(n: int = 20, path: str = "", oneline: bool = True, project: st
|
||||
args += [f"-{max(1, min(n, 200))}"]
|
||||
if path:
|
||||
args += ["--", path]
|
||||
rc, out = await _git(*args, cwd=cwd)
|
||||
rc, out = await _git(*args, cwd=cwd, ssh_host=ssh_host)
|
||||
if rc != 0:
|
||||
return f"git log failed: {out}"
|
||||
return _cap(out) or "No commits found."
|
||||
@@ -110,8 +120,8 @@ async def git_log(n: int = 20, path: str = "", oneline: bool = True, project: st
|
||||
|
||||
async def git_diff(ref_a: str = "", ref_b: str = "", path: str = "", stat_only: bool = False, project: str = "") -> str:
|
||||
"""Show a diff for a project. Defaults to working tree vs HEAD."""
|
||||
cwd = _resolve_project(project)
|
||||
if not cwd.is_dir():
|
||||
cwd, ssh_host = _resolve_project(project)
|
||||
if not ssh_host and not cwd.is_dir():
|
||||
return f"Error: project directory not found: {cwd}"
|
||||
args = ["diff"]
|
||||
if stat_only:
|
||||
@@ -122,7 +132,7 @@ async def git_diff(ref_a: str = "", ref_b: str = "", path: str = "", stat_only:
|
||||
args += [ref_a]
|
||||
if path:
|
||||
args += ["--", path]
|
||||
rc, out = await _git(*args, cwd=cwd)
|
||||
rc, out = await _git(*args, cwd=cwd, ssh_host=ssh_host)
|
||||
# diff exits 1 when differences exist — normal
|
||||
if rc not in (0, 1):
|
||||
return f"git diff failed: {out}"
|
||||
@@ -133,29 +143,27 @@ async def git_diff(ref_a: str = "", ref_b: str = "", path: str = "", stat_only:
|
||||
|
||||
async def git_commit(message: str, project: str = "", files: list[str] | None = None) -> str:
|
||||
"""Stage files and create a commit in a project."""
|
||||
cwd = _resolve_project(project)
|
||||
if not cwd.is_dir():
|
||||
cwd, ssh_host = _resolve_project(project)
|
||||
if not ssh_host and not cwd.is_dir():
|
||||
return f"Error: project directory not found: {cwd}"
|
||||
if not message.strip():
|
||||
return "Error: commit message is required."
|
||||
|
||||
# Stage specified files or all changes
|
||||
if files:
|
||||
for f in files:
|
||||
rc, out = await _git("add", "--", f, cwd=cwd)
|
||||
rc, out = await _git("add", "--", f, cwd=cwd, ssh_host=ssh_host)
|
||||
if rc != 0:
|
||||
return f"git add '{f}' failed: {out}"
|
||||
else:
|
||||
rc, out = await _git("add", "-A", cwd=cwd)
|
||||
rc, out = await _git("add", "-A", cwd=cwd, ssh_host=ssh_host)
|
||||
if rc != 0:
|
||||
return f"git add -A failed: {out}"
|
||||
|
||||
# Check that something is actually staged
|
||||
rc, staged = await _git("diff", "--cached", "--stat", cwd=cwd)
|
||||
rc, staged = await _git("diff", "--cached", "--stat", cwd=cwd, ssh_host=ssh_host)
|
||||
if not staged.strip():
|
||||
return "Nothing staged to commit — working tree already clean."
|
||||
|
||||
rc, out = await _git("commit", "-m", message, cwd=cwd)
|
||||
rc, out = await _git("commit", "-m", message, cwd=cwd, ssh_host=ssh_host)
|
||||
if rc != 0:
|
||||
return f"git commit failed: {out}"
|
||||
return out or "Committed successfully."
|
||||
@@ -163,15 +171,15 @@ async def git_commit(message: str, project: str = "", files: list[str] | None =
|
||||
|
||||
async def git_push(project: str = "", remote: str = "origin", branch: str = "") -> str:
|
||||
"""Push the current branch to a remote."""
|
||||
cwd = _resolve_project(project)
|
||||
if not cwd.is_dir():
|
||||
cwd, ssh_host = _resolve_project(project)
|
||||
if not ssh_host and not cwd.is_dir():
|
||||
return f"Error: project directory not found: {cwd}"
|
||||
|
||||
args = ["push", remote]
|
||||
if branch:
|
||||
args.append(branch)
|
||||
|
||||
rc, out = await _git(*args, cwd=cwd, timeout=30)
|
||||
rc, out = await _git(*args, cwd=cwd, ssh_host=ssh_host, timeout=30)
|
||||
if rc != 0:
|
||||
return f"git push failed: {out}"
|
||||
return out or f"Pushed to {remote} successfully."
|
||||
@@ -186,6 +194,7 @@ DECLARATIONS = [
|
||||
"Show the working tree status for a project: staged changes, unstaged "
|
||||
"modifications, and untracked files. Use before committing to see what "
|
||||
"will be included. Defaults to the Cortex project. "
|
||||
"aether_api, aether_frontend, and aether_container run on the workstation via SSH."
|
||||
),
|
||||
parameters=types.Schema(
|
||||
type=types.Type.OBJECT,
|
||||
@@ -198,6 +207,7 @@ DECLARATIONS = [
|
||||
"Show recent commit history for a project. Returns commit hashes, dates, "
|
||||
"and messages. Use after aider_run completes to see what was committed. "
|
||||
"Defaults to the Cortex project. "
|
||||
"aether_api, aether_frontend, and aether_container run on the workstation via SSH."
|
||||
),
|
||||
parameters=types.Schema(
|
||||
type=types.Type.OBJECT,
|
||||
@@ -227,6 +237,7 @@ DECLARATIONS = [
|
||||
"With ref_a and ref_b: changes between the two refs. "
|
||||
"Use after aider_run (auto_commit=False) to review changes before committing. "
|
||||
"Defaults to the Cortex project. "
|
||||
"aether_api, aether_frontend, and aether_container run on the workstation via SSH."
|
||||
),
|
||||
parameters=types.Schema(
|
||||
type=types.Type.OBJECT,
|
||||
@@ -257,6 +268,7 @@ DECLARATIONS = [
|
||||
"Stage files and create a git commit in a project. "
|
||||
"Use after reviewing changes with git_diff — especially when aider_run ran "
|
||||
"with auto_commit=False. Stages all changes by default (files=None). "
|
||||
"aether_api, aether_frontend, and aether_container commit on the workstation via SSH. "
|
||||
"ADMIN ONLY. Requires confirmation."
|
||||
),
|
||||
parameters=types.Schema(
|
||||
@@ -284,6 +296,7 @@ DECLARATIONS = [
|
||||
description=(
|
||||
"Push the current branch to a remote. "
|
||||
"Use after git_commit or after aider_run commits to share the changes. "
|
||||
"aether_api, aether_frontend, and aether_container push on the workstation via SSH. "
|
||||
"ADMIN ONLY. Requires confirmation."
|
||||
),
|
||||
parameters=types.Schema(
|
||||
|
||||
@@ -1,20 +1,21 @@
|
||||
# Architecture: LLM Backends
|
||||
|
||||
> How Cortex selects and talks to AI models.
|
||||
> Last updated: 2026-05-06
|
||||
> Last updated: 2026-06-18
|
||||
|
||||
---
|
||||
|
||||
## Providers
|
||||
|
||||
Cortex supports four model types, each dispatched differently:
|
||||
Cortex supports two model types, each dispatched differently:
|
||||
|
||||
| Type | Auth | Use |
|
||||
|---|---|---|
|
||||
| `claude_cli` | OAuth token from `~/.claude/.credentials.json` | Chat, persona responses |
|
||||
| `gemini_cli` | Gemini CLI credentials | Chat fallback / explicit selection |
|
||||
| `gemini_api` | API key from registry account or `.env` | Orchestrator tool loop |
|
||||
| `local_openai` | API key per host in model registry | Open WebUI, Ollama, OpenRouter, LiteLLM, etc. |
|
||||
| `local_openai` | API key per host in model registry | Open WebUI, Ollama, OpenRouter, LiteLLM, any OpenAI-compatible endpoint |
|
||||
| `anthropic_api` | API key in model registry (Anthropic cloud provider) | Claude models via Anthropic SDK |
|
||||
|
||||
The Gemini API (`gemini_api`) is a third type used exclusively by the orchestrator engine —
|
||||
it is not dispatched through `llm_client.py` and is not available for chat/distill roles.
|
||||
|
||||
---
|
||||
|
||||
@@ -22,40 +23,36 @@ Cortex supports four model types, each dispatched differently:
|
||||
|
||||
### Default: Role-Based Routing (Auto)
|
||||
|
||||
When no explicit backend is selected, Cortex routes to the model configured for the
|
||||
request's **role** in the user's model registry. Roles: `chat`, `orchestrator`, `distill`,
|
||||
`coder`, `research` (extensible via `DEFINED_ROLES` in `.env`).
|
||||
All routing goes through the user's model registry. When a request arrives, `complete()` in
|
||||
`llm_client.py` resolves the model for the given role:
|
||||
|
||||
Resolution order for a role:
|
||||
1. User registry: `roles[role].primary → backup_1 → backup_2 → backup_3 → backup_4`
|
||||
2. `.env` role default: `ROLE_CHAT=claude_cli`, `ROLE_DISTILL=claude_cli`, etc.
|
||||
3. Hardcoded last-resort: `chat/distill/coder → claude_cli`, `orchestrator/research → gemini_api`
|
||||
|
||||
### Explicit Override
|
||||
|
||||
The **Role** toggle in the Context & Memory panel cycles through configured role slots for the `chat` role: **Primary → Backup 1 → Backup 2 → auto**.
|
||||
|
||||
- Each slot shows the configured model label
|
||||
- `auto` uses the Primary without forcing a specific backend type
|
||||
- The ⚡ Tools toggle is independent — it routes to the `orchestrator` role regardless of the chat role selection
|
||||
|
||||
**Fallback chain** (automatic, only when no explicit registry entry exists):
|
||||
```
|
||||
claude → gemini
|
||||
gemini → claude
|
||||
local → claude
|
||||
slot specified → resolve that exact slot (primary / backup_1 / backup_2)
|
||||
no slot → get_model_for_role(username, role)
|
||||
no registry entry → RuntimeError: "No model configured for role '...'"
|
||||
```
|
||||
When a model is explicitly configured in the registry, errors surface immediately — no silent fallback.
|
||||
|
||||
Each response shows a model tag (bottom-right of the message bubble) with the model label and host.
|
||||
Roles: `chat`, `orchestrator`, `distill`, `janitor`, `coder`, `research` (extensible via
|
||||
`DEFINED_ROLES` in `.env`).
|
||||
|
||||
There is no implicit fallback to a built-in model. If no model is configured for a role,
|
||||
the request fails with a clear error directing the user to `/settings/models`.
|
||||
|
||||
### Explicit Slot Selection
|
||||
|
||||
The **Role** toggle in the Context & Memory panel cycles through configured role slots:
|
||||
**Primary → Backup 1 → auto**. Each slot resolves the configured model for that position.
|
||||
|
||||
When a model is explicitly configured (via slot or registry entry), errors surface
|
||||
immediately — no silent fallback to another backend.
|
||||
|
||||
---
|
||||
|
||||
## Model Registry — V2 Schema
|
||||
## Model Registry Schema
|
||||
|
||||
Per-user configuration stored in `home/{user}/model_registry.json`.
|
||||
|
||||
Managed at **Settings → Models** (`/settings/models`). Full provider UI coming in Phase 2.
|
||||
Managed at **Settings → Models** (`/settings/models`).
|
||||
|
||||
```json
|
||||
{
|
||||
@@ -64,7 +61,7 @@ Managed at **Settings → Models** (`/settings/models`). Full provider UI coming
|
||||
"providers": {
|
||||
"anthropic": {
|
||||
"credentials": [
|
||||
{"id": "cli", "label": "Claude CLI (OAuth)", "type": "cli"}
|
||||
{"id": "key1", "label": "My Anthropic Key", "type": "api_key", "api_key": "sk-ant-..."}
|
||||
]
|
||||
},
|
||||
"google": {
|
||||
@@ -77,6 +74,13 @@ Managed at **Settings → Models** (`/settings/models`). Full provider UI coming
|
||||
"hosts": [
|
||||
{
|
||||
"id": "abc123",
|
||||
"label": "OpenRouter",
|
||||
"api_url": "https://openrouter.ai/api/v1",
|
||||
"api_key": "sk-or-...",
|
||||
"host_type": "openai"
|
||||
},
|
||||
{
|
||||
"id": "def456",
|
||||
"label": "Gaming Laptop",
|
||||
"api_url": "http://192.168.x.x:3000",
|
||||
"api_key": "",
|
||||
@@ -87,23 +91,22 @@ Managed at **Settings → Models** (`/settings/models`). Full provider UI coming
|
||||
"models": [
|
||||
{
|
||||
"id": "m1",
|
||||
"type": "claude_cli",
|
||||
"label": "Sonnet 4.6 (CLI)",
|
||||
"model_name": "claude-sonnet-4-6",
|
||||
"provider": "anthropic",
|
||||
"credential_id": "cli",
|
||||
"type": "local_openai",
|
||||
"label": "Claude Sonnet 4.6 (OpenRouter)",
|
||||
"model_name": "anthropic/claude-sonnet-4-6",
|
||||
"host_id": "abc123",
|
||||
"context_k": 200,
|
||||
"tags": ["chat", "persona"]
|
||||
},
|
||||
{
|
||||
"id": "m2",
|
||||
"type": "gemini_api",
|
||||
"label": "Gemini 2.5 Flash (OSIT)",
|
||||
"model_name": "gemini-2.5-flash",
|
||||
"provider": "google",
|
||||
"account_id": "a1b2",
|
||||
"context_k": 1000,
|
||||
"tags": ["orchestrator", "research"]
|
||||
"type": "anthropic_api",
|
||||
"label": "Claude Sonnet 4.6 (Direct)",
|
||||
"model_name": "claude-sonnet-4-6",
|
||||
"provider": "anthropic",
|
||||
"credential_id": "key1",
|
||||
"context_k": 200,
|
||||
"tags": ["chat"]
|
||||
},
|
||||
{
|
||||
"id": "m3",
|
||||
@@ -111,7 +114,7 @@ Managed at **Settings → Models** (`/settings/models`). Full provider UI coming
|
||||
"label": "Gemma 4 E4B",
|
||||
"model_name": "gemma4:e4b",
|
||||
"provider": "local",
|
||||
"host_id": "abc123",
|
||||
"host_id": "def456",
|
||||
"context_k": 72,
|
||||
"max_rounds": 5,
|
||||
"tools": true,
|
||||
@@ -120,8 +123,8 @@ Managed at **Settings → Models** (`/settings/models`). Full provider UI coming
|
||||
],
|
||||
|
||||
"roles": {
|
||||
"chat": {"primary": "m1", "backup_1": "m2", "backup_2": "m3"},
|
||||
"orchestrator": {"primary": "m2", "backup_1": "m3"},
|
||||
"chat": {"primary": "m1", "backup_1": "m2"},
|
||||
"orchestrator": {"primary": "m2"},
|
||||
"distill": {"primary": "m1"}
|
||||
}
|
||||
}
|
||||
@@ -145,52 +148,9 @@ Managed at **Settings → Models** (`/settings/models`). Full provider UI coming
|
||||
Set `api_url` to the base path before `/chat/completions`:
|
||||
- OpenRouter: `https://openrouter.ai/api/v1`
|
||||
|
||||
### Built-in model IDs
|
||||
|
||||
Always resolvable without a user-created registry entry. Used as role defaults.
|
||||
|
||||
| ID | Type | Notes |
|
||||
|---|---|---|
|
||||
| `claude_cli` | `claude_cli` | Model from `DEFAULT_MODEL` in `.env` |
|
||||
| `gemini_cli` | `gemini_cli` | Gemini CLI subprocess |
|
||||
| `gemini_api` | `gemini_api` | Model from `ORCHESTRATOR_MODEL` in `.env`; key from `GEMINI_API_KEY` |
|
||||
|
||||
### V1 → V2 migration
|
||||
|
||||
Automatic on first load. Changes:
|
||||
- Adds `providers` section (Anthropic CLI credential + empty Google accounts)
|
||||
- Migrates `gemini_api_key` from `auth.json` → `providers.google.accounts[0]`
|
||||
- All existing hosts, models, and role assignments are preserved
|
||||
|
||||
---
|
||||
|
||||
## Claude Backend (`_claude()`)
|
||||
|
||||
Runs `claude --print --no-session-persistence --output-format text` as a subprocess.
|
||||
|
||||
- System prompt passed via `--system-prompt`
|
||||
- Conversation history formatted as `<conversation>` block
|
||||
- Token read live from `~/.claude/.credentials.json` on every call — never uses the
|
||||
env var, which goes stale after `claude auth login`
|
||||
- Model override via `--model` flag when `model_name` is set in the registry entry
|
||||
|
||||
Timeout: `TIMEOUT_CLAUDE=60` seconds (`.env`)
|
||||
|
||||
---
|
||||
|
||||
## Gemini CLI Backend (`_gemini()`)
|
||||
|
||||
Runs `gemini --output-format text --extensions "" -p <prompt>` as a subprocess.
|
||||
|
||||
- `--extensions ""` disables all MCP extensions — prevents child processes keeping pipes open
|
||||
- `start_new_session=True` puts the process in its own group for clean `os.killpg` on timeout
|
||||
- Output is cleaned to strip CLI noise (loading messages, retry notices, quota warnings)
|
||||
|
||||
Timeout: `TIMEOUT_GEMINI=120` seconds (`.env`)
|
||||
|
||||
---
|
||||
|
||||
## Local Backend (`_local()`)
|
||||
## Local/OpenAI-Compatible Backend (`_local()`)
|
||||
|
||||
HTTP POST to an OpenAI-compatible endpoint. Model config is resolved via the model registry.
|
||||
|
||||
@@ -199,13 +159,36 @@ HTTP POST to an OpenAI-compatible endpoint. Model config is resolved via the mod
|
||||
# host_type "openai": POST {api_url}/chat/completions
|
||||
```
|
||||
|
||||
System prompt is sent as the first `{"role": "system", "content": "..."}` message.
|
||||
Image attachments are injected into the last user message as `image_url` content blocks.
|
||||
Token usage is recorded when returned by the endpoint.
|
||||
|
||||
Streaming variant: `_local_streaming()` — SSE line-by-line, yields tokens via `token_sink`.
|
||||
|
||||
Timeout: `TIMEOUT_LOCAL=300` seconds (`.env`) — local models may need to load from disk.
|
||||
|
||||
---
|
||||
|
||||
## Gemini API (Orchestrator)
|
||||
## Anthropic API Backend (`_anthropic_api()`)
|
||||
|
||||
Used by `orchestrator_engine.py` for the ReAct tool loop. Not used for general chat.
|
||||
Direct call to the Anthropic Messages API via the `anthropic` Python SDK.
|
||||
|
||||
System prompt passed as top-level `system` field. Messages stripped to `role`/`content` only.
|
||||
Token usage is always recorded from `resp.usage`.
|
||||
|
||||
Streaming variant: `_anthropic_api_streaming()` — uses `client.messages.stream()`, yields
|
||||
tokens via `token_sink`.
|
||||
|
||||
API key comes from the model registry: `providers.anthropic.credentials[n].api_key`.
|
||||
|
||||
Timeout: governed by httpx defaults and the Anthropic SDK's own connection handling.
|
||||
|
||||
---
|
||||
|
||||
## Gemini API (Orchestrator only)
|
||||
|
||||
Used by `orchestrator_engine.py` for the ReAct tool loop. Not dispatched through
|
||||
`llm_client.py` and not available for chat, distill, or other roles.
|
||||
|
||||
API key resolution order:
|
||||
1. `api_key` embedded in the resolved orchestrator model config (V2 registry with `account_id`)
|
||||
@@ -217,9 +200,7 @@ API key resolution order:
|
||||
## Distillation
|
||||
|
||||
Memory distillation uses `role="distill"`. Configure via Model Registry → Role Assignments.
|
||||
|
||||
`.env` override: `ROLE_DISTILL=claude_cli` (default).
|
||||
|
||||
Any `local_openai` or `anthropic_api` model can be assigned to the distill role.
|
||||
|
||||
---
|
||||
|
||||
@@ -232,4 +213,4 @@ Memory distillation uses `role="distill"`. Configure via Model Registry → Role
|
||||
| `cortex/routers/local_llm.py` | Settings UI routes + `/api/models/role` AJAX |
|
||||
| `cortex/routers/chat.py` | `_backend_label()`, `fallback_used` flag |
|
||||
| `cortex/routers/orchestrator.py` | Engine selection, Gemini API key resolution |
|
||||
| `cortex/config.py` | `ROLE_*` env defaults, `DEFINED_ROLES`, `PRIMARY_BACKEND` |
|
||||
| `cortex/config.py` | `ROLE_*` env defaults, `DEFINED_ROLES`, `TIMEOUT_LOCAL` |
|
||||
|
||||
Reference in New Issue
Block a user