Compare commits
20 Commits
27ca7c7efd
...
1222f806ce
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1222f806ce | ||
|
|
ed191cf0b4 | ||
|
|
44f215c764 | ||
|
|
d61e39d614 | ||
|
|
93a692f3f0 | ||
|
|
af4d78136a | ||
|
|
af7d8b40e2 | ||
|
|
4159f470d6 | ||
|
|
e2a61bb78d | ||
|
|
80702a21e2 | ||
|
|
2b9dd53566 | ||
|
|
1cc7988953 | ||
|
|
8baab874f1 | ||
|
|
962d58d2e2 | ||
|
|
3bc6b45f9f | ||
|
|
ef07596955 | ||
|
|
6e56024815 | ||
|
|
9f6b162fbd | ||
|
|
f08b033d6c | ||
|
|
45c95d20ba |
11
CLAUDE.md
11
CLAUDE.md
@@ -212,9 +212,9 @@ clearly asked for a directory to be unblocked.
|
||||
|
||||
---
|
||||
|
||||
## Current State (2026-04-03)
|
||||
## Current State (2026-04-28)
|
||||
|
||||
Cortex is running and stable. All three primary channels are live:
|
||||
Cortex is running and stable. All channels are live:
|
||||
|
||||
| Channel | Status | Notes |
|
||||
|---|---|---|
|
||||
@@ -222,9 +222,16 @@ Cortex is running and stable. All three primary channels are live:
|
||||
| Nextcloud Talk | ✅ Live | HMAC-signed webhook, async reply |
|
||||
| Google Chat | ✅ Live | Workspace Add-on, `hostAppDataAction` response format |
|
||||
| Local backend | ✅ Live | Open WebUI/Ollama, per-user multi-model config |
|
||||
| Orchestrator | ✅ Live | Gemini API tool loop → Claude response; ⚡ toggle in UI |
|
||||
|
||||
Active users: scott (inara, developer), holly (tina), brian (wintermute)
|
||||
|
||||
**27 orchestrator tools:** web_search, file_read, shell_exec, claude_allow_dir,
|
||||
task_list/create/update/complete, cron_list/add/remove/toggle,
|
||||
reminders_add/list/clear, scratch_read/write/append/clear,
|
||||
ae_journal_list/search/entry_create/entry_update/entry_disable/entry_append/entry_prepend,
|
||||
ae_task_list.
|
||||
|
||||
See `documentation/TODO__Agents.md` for the active task list.
|
||||
See `documentation/ROADMAP.md` for phases and what's next.
|
||||
|
||||
|
||||
@@ -49,14 +49,17 @@ async def complete(
|
||||
messages: list[dict],
|
||||
model: str | None = None,
|
||||
role: str = "chat",
|
||||
slot: str | None = None,
|
||||
max_tokens: int = 2048,
|
||||
) -> tuple[str, str]:
|
||||
"""
|
||||
Returns (response_text, actual_backend_used).
|
||||
|
||||
model: explicit backend override ("claude" | "gemini" | "local") from UI toggle.
|
||||
slot: Phase 3 — specific role slot ("primary" | "backup_1" | "backup_2").
|
||||
Resolves that exact slot, no fallback chain. Takes priority over model.
|
||||
model: legacy backend override ("claude" | "gemini" | "local") from old toggle.
|
||||
None = resolve via model registry for the given role.
|
||||
role: registry role used when model is None (default: "chat").
|
||||
role: registry role used for slot/auto routing (default: "chat").
|
||||
"""
|
||||
import model_registry as _reg
|
||||
from persona import _user
|
||||
@@ -64,21 +67,31 @@ async def complete(
|
||||
username = _user.get()
|
||||
resolved_cfg: dict | None = None
|
||||
|
||||
if model in _EXPLICIT_BACKENDS:
|
||||
# User explicitly selected a backend in the UI
|
||||
if model == "local":
|
||||
resolved_cfg = _reg.get_best_local_model(username, role)
|
||||
if not resolved_cfg:
|
||||
raise RuntimeError("No local model configured — add one at /settings/models")
|
||||
primary = model
|
||||
else:
|
||||
# Role-based routing via model registry
|
||||
resolved = _reg.get_model_for_role(username, role)
|
||||
if resolved:
|
||||
resolved_cfg = resolved
|
||||
primary = _TYPE_TO_BACKEND.get(resolved["type"], "claude")
|
||||
if slot is not None:
|
||||
# Phase 3: explicit slot selection — no fallback within the role
|
||||
resolved_cfg = _reg.get_model_for_slot(username, role, slot)
|
||||
if resolved_cfg:
|
||||
primary = _TYPE_TO_BACKEND.get(resolved_cfg["type"], "claude")
|
||||
else:
|
||||
primary = settings.primary_backend
|
||||
# Slot not configured — fall through to auto routing
|
||||
slot = None
|
||||
|
||||
if slot is None:
|
||||
if model in _EXPLICIT_BACKENDS:
|
||||
# Legacy: explicit backend override from old UI toggle
|
||||
if model == "local":
|
||||
resolved_cfg = _reg.get_best_local_model(username, role)
|
||||
if not resolved_cfg:
|
||||
raise RuntimeError("No local model configured — add one at /settings/models")
|
||||
primary = model
|
||||
else:
|
||||
# Auto: role-based routing via model registry
|
||||
resolved = _reg.get_model_for_role(username, role)
|
||||
if resolved:
|
||||
resolved_cfg = resolved
|
||||
primary = _TYPE_TO_BACKEND.get(resolved["type"], "claude")
|
||||
else:
|
||||
primary = settings.primary_backend
|
||||
|
||||
fallback = _FALLBACK.get(primary, "claude")
|
||||
|
||||
@@ -89,9 +102,7 @@ async def complete(
|
||||
err_str = str(e)
|
||||
if primary == "claude" and any(k in err_str for k in ("401", "authenticate", "expired", "OAuth")):
|
||||
await event_bus.publish({"type": "claude_auth_expired"})
|
||||
# Only fall back when using a default/auto backend.
|
||||
# If the user has explicitly configured a model via the registry,
|
||||
# surface the error so they know something is wrong.
|
||||
# Surface errors when a model is explicitly configured or a specific slot was pinned.
|
||||
if resolved_cfg is not None:
|
||||
logger.error("%s failed (no fallback — model explicitly configured): %s", primary, e)
|
||||
raise
|
||||
@@ -177,9 +188,9 @@ async def _local(system_prompt: str, messages: list[dict], model_cfg: dict | Non
|
||||
model = cfg["model_name"]
|
||||
|
||||
if not api_url:
|
||||
raise RuntimeError("local_api_url not configured — set LOCAL_API_URL in .env or add a host at /settings/local")
|
||||
raise RuntimeError("local_api_url not configured — set LOCAL_API_URL in .env or add a host at /settings/models")
|
||||
if not model:
|
||||
raise RuntimeError("local_model not configured — add a model at /settings/local")
|
||||
raise RuntimeError("local_model not configured — add a model at /settings/models")
|
||||
|
||||
host_type = cfg.get("host_type", "openwebui")
|
||||
# "openwebui" uses Open WebUI/Ollama path layout; "openai" uses standard OpenAI layout
|
||||
|
||||
@@ -1,57 +1,72 @@
|
||||
"""
|
||||
Per-user unified model registry.
|
||||
Per-user unified model registry — V2.
|
||||
|
||||
Stored in: home/{user}/model_registry.json
|
||||
|
||||
Schema:
|
||||
V2 Schema:
|
||||
{
|
||||
"version": 1,
|
||||
"hosts": [{"id", "label", "api_url", "api_key",
|
||||
"host_type": "openwebui" | "openai"}, ...],
|
||||
#
|
||||
# host_type controls the API path layout:
|
||||
# "openwebui" (default) — Open WebUI / Ollama:
|
||||
# chat: POST {url}/api/chat/completions
|
||||
# models: GET {url}/api/models
|
||||
# "openai" — OpenRouter, LiteLLM, Anthropic-compatible, etc.:
|
||||
# chat: POST {url}/chat/completions
|
||||
# models: GET {url}/models
|
||||
# Set api_url to the base path that ends just before /chat/completions,
|
||||
# e.g. https://openrouter.ai/api/v1 for OpenRouter.
|
||||
"version": 2,
|
||||
|
||||
# Per-provider accounts / credentials (user-configured)
|
||||
"providers": {
|
||||
"anthropic": {
|
||||
"credentials": [
|
||||
{"id": "cli", "label": "Claude CLI (OAuth)", "type": "cli"}
|
||||
]
|
||||
},
|
||||
"google": {
|
||||
"accounts": [
|
||||
{"id": "<hex>", "label": "My Google account", "api_key": "AIza..."}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
# Local OpenAI-compatible hosts (unchanged from V1)
|
||||
"hosts": [{"id", "label", "api_url", "api_key", "host_type"}, ...],
|
||||
|
||||
# User-registered model entries (all providers)
|
||||
"models": [
|
||||
{
|
||||
"id": str, # unique within this registry
|
||||
"type": str, # "local_openai" | "claude_cli" | "gemini_cli" | "gemini_api"
|
||||
"label": str, # human-readable display name
|
||||
"model_name": str, # model identifier sent to the API
|
||||
"host_id": str | null, # only for local_openai — references hosts[].id
|
||||
"context_k": int, # context window in thousands of tokens (informational)
|
||||
"tags": [str], # user-defined capability tags
|
||||
"id": str, # unique within this registry
|
||||
"type": str, # see TYPES below
|
||||
"label": str, # human-readable
|
||||
"model_name": str, # identifier sent to the API / CLI
|
||||
"provider": str | null, # "anthropic" | "google" | "local" | null
|
||||
"host_id": str | null, # local_openai only — references hosts[].id
|
||||
"credential_id":str | null, # claude_cli only — references providers.anthropic.credentials
|
||||
"account_id": str | null, # gemini_api only — references providers.google.accounts
|
||||
"context_k": int, # context window in k tokens (informational)
|
||||
"tags": [str], # user-defined capability tags
|
||||
},
|
||||
],
|
||||
|
||||
# Role assignments — any model (any provider) can fill any role
|
||||
"roles": {
|
||||
"<role>": {
|
||||
"primary": "<model_id>" | null,
|
||||
"backup_1": "<model_id>" | null,
|
||||
"backup_2": "<model_id>" | null,
|
||||
"backup_3": "<model_id>" | null,
|
||||
...
|
||||
"backup_4": "<model_id>" | null,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
Built-in model IDs (always resolvable, no registry entry required):
|
||||
"claude_cli" — Claude CLI subprocess (~/.claude/.credentials.json)
|
||||
"gemini_cli" — Gemini CLI subprocess
|
||||
"gemini_api" — Gemini API (google-genai SDK; used by orchestrator engine, not llm_client)
|
||||
Types:
|
||||
"claude_cli" — Claude CLI subprocess (~/.claude/.credentials.json)
|
||||
"gemini_cli" — Gemini CLI subprocess
|
||||
"gemini_api" — Gemini API (google-genai SDK); account_id → api_key from providers.google
|
||||
"local_openai" — OpenAI-compatible endpoint; host_id → api_url/api_key from hosts[]
|
||||
|
||||
Standard roles are defined by settings.defined_roles (default: chat,orchestrator,distill,coder,research).
|
||||
Additional custom roles can be added freely to roles{}.
|
||||
Built-in model IDs (always resolvable without a registry entry):
|
||||
"claude_cli" — resolves to the default Claude CLI model
|
||||
"gemini_cli" — resolves to Gemini CLI
|
||||
"gemini_api" — resolves to Gemini API using GEMINI_API_KEY from .env
|
||||
|
||||
Resolution for get_model_for_role(username, role):
|
||||
1. User registry: roles[role].primary → backup_1 → backup_2 → backup_3 → backup_4
|
||||
2. .env default: ROLE_<ROLE>=<builtin_id> (e.g. ROLE_CHAT=claude_cli)
|
||||
Role resolution for get_model_for_role(username, role):
|
||||
1. User registry: roles[role].primary → backup_1 → ... → backup_4
|
||||
2. .env default: ROLE_<ROLE>=<builtin_id>
|
||||
3. Hardcoded last-resort defaults per role
|
||||
4. claude_cli (absolute fallback)
|
||||
"""
|
||||
|
||||
import json
|
||||
@@ -63,11 +78,36 @@ from config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ── Provider model catalogs ───────────────────────────────────────────────────
|
||||
# Server-side defaults. Update here when providers release new models.
|
||||
# Users can add entries via the settings UI (Phase 2).
|
||||
|
||||
ANTHROPIC_CATALOG: list[dict] = [
|
||||
# Latest
|
||||
{"id": "claude-opus-4-7", "label": "Claude Opus 4.7", "context_k": 1000},
|
||||
{"id": "claude-sonnet-4-6", "label": "Claude Sonnet 4.6", "context_k": 1000},
|
||||
{"id": "claude-haiku-4-5-20251001", "label": "Claude Haiku 4.5", "context_k": 200},
|
||||
# Previous versions (still available, not deprecated)
|
||||
{"id": "claude-opus-4-6", "label": "Claude Opus 4.6", "context_k": 1000},
|
||||
{"id": "claude-sonnet-4-5", "label": "Claude Sonnet 4.5", "context_k": 200},
|
||||
]
|
||||
|
||||
GOOGLE_CATALOG: list[dict] = [
|
||||
# Stable / generally available
|
||||
{"id": "gemini-2.5-pro", "label": "Gemini 2.5 Pro", "context_k": 1000},
|
||||
{"id": "gemini-2.5-flash", "label": "Gemini 2.5 Flash", "context_k": 1000},
|
||||
{"id": "gemini-2.5-flash-lite", "label": "Gemini 2.5 Flash-Lite", "context_k": 1000},
|
||||
# Preview
|
||||
{"id": "gemini-3.1-pro-preview", "label": "Gemini 3.1 Pro (preview)", "context_k": 1000},
|
||||
{"id": "gemini-3-flash-preview", "label": "Gemini 3 Flash (preview)", "context_k": 1000},
|
||||
{"id": "gemini-3.1-flash-lite-preview", "label": "Gemini 3.1 Flash-Lite (preview)", "context_k": 1000},
|
||||
]
|
||||
|
||||
|
||||
# ── Built-in model definitions ────────────────────────────────────────────────
|
||||
# These IDs are always resolvable without a registry entry.
|
||||
|
||||
def _builtins() -> dict[str, dict]:
|
||||
"""Return built-in model definitions (lazy so settings are resolved at call time)."""
|
||||
return {
|
||||
"claude_cli": {
|
||||
"id": "claude_cli",
|
||||
@@ -96,7 +136,6 @@ def _builtins() -> dict[str, dict]:
|
||||
}
|
||||
|
||||
|
||||
# Hardcoded last-resort defaults per role (used only if .env is also unset)
|
||||
_ROLE_LAST_RESORT: dict[str, str] = {
|
||||
"chat": "claude_cli",
|
||||
"orchestrator": "gemini_api",
|
||||
@@ -118,14 +157,40 @@ def _local_llm_path(username: str) -> Path:
|
||||
return settings.home_root() / username / "local_llm.json"
|
||||
|
||||
|
||||
def _auth_path(username: str) -> Path:
|
||||
return settings.home_root() / username / "auth.json"
|
||||
|
||||
|
||||
def _empty() -> dict:
|
||||
return {"version": 1, "hosts": [], "models": [], "roles": {}}
|
||||
return {
|
||||
"version": 2,
|
||||
"providers": _default_providers(),
|
||||
"hosts": [],
|
||||
"models": [],
|
||||
"roles": {},
|
||||
}
|
||||
|
||||
|
||||
def _default_providers() -> dict:
|
||||
return {
|
||||
"anthropic": {
|
||||
"credentials": [
|
||||
{"id": "cli", "label": "Claude CLI (OAuth)", "type": "cli"}
|
||||
]
|
||||
},
|
||||
"google": {
|
||||
"accounts": []
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _normalize(data: dict) -> dict:
|
||||
"""Back-fill any missing fields introduced by schema additions."""
|
||||
"""Back-fill missing fields introduced by schema additions."""
|
||||
for h in data.get("hosts", []):
|
||||
h.setdefault("host_type", "openwebui")
|
||||
data.setdefault("providers", _default_providers())
|
||||
data["providers"].setdefault("anthropic", {"credentials": [{"id": "cli", "label": "Claude CLI (OAuth)", "type": "cli"}]})
|
||||
data["providers"].setdefault("google", {"accounts": []})
|
||||
return data
|
||||
|
||||
|
||||
@@ -135,12 +200,15 @@ def _load(username: str) -> dict:
|
||||
try:
|
||||
data = json.loads(path.read_text())
|
||||
if isinstance(data, dict) and "version" in data:
|
||||
if data["version"] == 1:
|
||||
data = _migrate_v1_to_v2(username, data)
|
||||
_save(username, data)
|
||||
return _normalize(data)
|
||||
except (json.JSONDecodeError, OSError):
|
||||
logger.warning("model_registry.json for %s is unreadable — starting fresh", username)
|
||||
return _empty()
|
||||
|
||||
# No registry yet — try migrating from local_llm.json
|
||||
# No registry — try migrating from local_llm.json
|
||||
legacy = _local_llm_path(username)
|
||||
if legacy.exists():
|
||||
data = _migrate_from_local_llm(username, legacy)
|
||||
@@ -157,8 +225,45 @@ def _save(username: str, data: dict) -> None:
|
||||
|
||||
# ── Migration ─────────────────────────────────────────────────────────────────
|
||||
|
||||
def _migrate_v1_to_v2(username: str, data: dict) -> dict:
|
||||
"""
|
||||
Upgrade a V1 registry to V2.
|
||||
|
||||
Changes:
|
||||
- Adds providers section with default structure
|
||||
- Migrates gemini_api_key from auth.json → first Google account entry
|
||||
- Sets version to 2
|
||||
"""
|
||||
logger.info("Migrating model_registry.json V1 → V2 for %s", username)
|
||||
|
||||
data["version"] = 2
|
||||
if "providers" not in data:
|
||||
data["providers"] = _default_providers()
|
||||
else:
|
||||
data["providers"].setdefault("anthropic", {"credentials": [{"id": "cli", "label": "Claude CLI (OAuth)", "type": "cli"}]})
|
||||
data["providers"].setdefault("google", {"accounts": []})
|
||||
|
||||
# Pull existing Gemini key from auth.json (stored there in V1) → first account entry
|
||||
accounts = data["providers"]["google"]["accounts"]
|
||||
if not accounts:
|
||||
try:
|
||||
auth = json.loads(_auth_path(username).read_text())
|
||||
existing_key = auth.get("gemini_api_key")
|
||||
if existing_key:
|
||||
accounts.append({
|
||||
"id": secrets.token_hex(4),
|
||||
"label": "Gemini API Key",
|
||||
"api_key": existing_key,
|
||||
})
|
||||
logger.info("Migrated gemini_api_key from auth.json → providers.google.accounts for %s", username)
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def _migrate_from_local_llm(username: str, path: Path) -> dict:
|
||||
"""Convert local_llm.json (hosts/models/active_model_id) → model_registry format."""
|
||||
"""Convert local_llm.json → V2 model_registry format."""
|
||||
try:
|
||||
old = json.loads(path.read_text())
|
||||
except Exception:
|
||||
@@ -190,30 +295,27 @@ def _migrate_from_local_llm(username: str, path: Path) -> dict:
|
||||
"type": "local_openai",
|
||||
"label": m.get("label") or m.get("model_name", ""),
|
||||
"model_name": m.get("model_name", ""),
|
||||
"provider": "local",
|
||||
"host_id": m.get("host_id"),
|
||||
"context_k": 0,
|
||||
"tags": [],
|
||||
})
|
||||
|
||||
# Build initial role assignments
|
||||
active_id = old.get("active_model_id")
|
||||
distill_type = settings.distill_backend_mid or None
|
||||
|
||||
roles: dict[str, dict] = {}
|
||||
if active_id and any(m["id"] == active_id for m in data["models"]):
|
||||
roles["chat"] = {"primary": active_id}
|
||||
data["roles"]["chat"] = {"primary": active_id}
|
||||
if settings.distill_backend_mid == "local":
|
||||
data["roles"]["distill"] = {"primary": active_id}
|
||||
|
||||
if distill_type == "local" and active_id:
|
||||
roles["distill"] = {"primary": active_id}
|
||||
|
||||
data["roles"] = roles
|
||||
# Migrate Gemini key from auth.json
|
||||
data = _migrate_v1_to_v2(username, {"version": 1, **data})
|
||||
return data
|
||||
|
||||
|
||||
# ── Model resolution ──────────────────────────────────────────────────────────
|
||||
|
||||
def _resolve_model(registry: dict, model_id: str) -> dict | None:
|
||||
"""Resolve a model_id to its full config dict, or None if not found."""
|
||||
"""Resolve a model_id to its full config dict (credentials merged in), or None."""
|
||||
builtins = _builtins()
|
||||
|
||||
# Built-in IDs take priority over user-defined entries with the same ID
|
||||
@@ -224,7 +326,9 @@ def _resolve_model(registry: dict, model_id: str) -> dict | None:
|
||||
if not model:
|
||||
return None
|
||||
|
||||
if model.get("type") == "local_openai":
|
||||
model_type = model.get("type")
|
||||
|
||||
if model_type == "local_openai":
|
||||
host_id = model.get("host_id")
|
||||
host = next((h for h in registry.get("hosts", []) if h["id"] == host_id), None)
|
||||
if not host:
|
||||
@@ -237,6 +341,19 @@ def _resolve_model(registry: dict, model_id: str) -> dict | None:
|
||||
"host_type": host.get("host_type", "openwebui"),
|
||||
}
|
||||
|
||||
if model_type == "gemini_api":
|
||||
account_id = model.get("account_id")
|
||||
if account_id:
|
||||
accounts = registry.get("providers", {}).get("google", {}).get("accounts", [])
|
||||
account = next((a for a in accounts if a["id"] == account_id), None)
|
||||
if account:
|
||||
return {**model, "api_key": account.get("api_key", "")}
|
||||
logger.warning("model %s references missing account_id %s", model_id, account_id)
|
||||
return dict(model)
|
||||
|
||||
if model_type == "claude_cli":
|
||||
return dict(model)
|
||||
|
||||
return dict(model)
|
||||
|
||||
|
||||
@@ -277,7 +394,6 @@ def get_best_local_model(username: str, role: str = "chat") -> dict | None:
|
||||
"""
|
||||
Return the best available local_openai model for the given role.
|
||||
Used when the user explicitly selects "local" backend in the UI.
|
||||
Tries the role's priority chain first, then any configured local model.
|
||||
"""
|
||||
registry = _load(username)
|
||||
role_cfg = registry.get("roles", {}).get(role, {})
|
||||
@@ -290,7 +406,6 @@ def get_best_local_model(username: str, role: str = "chat") -> dict | None:
|
||||
if resolved and resolved.get("type") == "local_openai":
|
||||
return resolved
|
||||
|
||||
# Fall back to first configured local model
|
||||
for model in registry.get("models", []):
|
||||
if model.get("type") == "local_openai":
|
||||
resolved = _resolve_model(registry, model["id"])
|
||||
@@ -300,15 +415,55 @@ def get_best_local_model(username: str, role: str = "chat") -> dict | None:
|
||||
return None
|
||||
|
||||
|
||||
# ── Read API (for UI and callers) ─────────────────────────────────────────────
|
||||
def get_model_for_slot(username: str, role: str, slot: str) -> dict | None:
|
||||
"""
|
||||
Resolve a single named priority slot from a role without walking the fallback chain.
|
||||
|
||||
Used by Phase 3 explicit slot selection — the user has pinned a specific model;
|
||||
don't silently redirect to another slot if this one is empty or broken.
|
||||
Returns None if the slot is unset or the model can't be resolved.
|
||||
"""
|
||||
if slot not in PRIORITY_KEYS:
|
||||
return None
|
||||
registry = _load(username)
|
||||
model_id = registry.get("roles", {}).get(role, {}).get(slot)
|
||||
if not model_id:
|
||||
return None
|
||||
return _resolve_model(registry, model_id)
|
||||
|
||||
|
||||
def get_google_api_key(username: str, account_id: str | None = None) -> str | None:
|
||||
"""
|
||||
Return the best available Gemini API key for the user.
|
||||
|
||||
If account_id is specified, returns that account's key (or None if not found).
|
||||
Otherwise returns the first configured account key, falling back to the
|
||||
server-level GEMINI_API_KEY from .env.
|
||||
"""
|
||||
registry = _load(username)
|
||||
accounts = registry.get("providers", {}).get("google", {}).get("accounts", [])
|
||||
|
||||
if account_id:
|
||||
account = next((a for a in accounts if a["id"] == account_id), None)
|
||||
return account.get("api_key") if account else None
|
||||
|
||||
# First configured account
|
||||
if accounts:
|
||||
return accounts[0].get("api_key") or None
|
||||
|
||||
# Fall back to .env server key
|
||||
return settings.gemini_api_key or None
|
||||
|
||||
|
||||
# ── Read API ──────────────────────────────────────────────────────────────────
|
||||
|
||||
def get_registry(username: str) -> dict:
|
||||
"""Return the full registry (with built-in models injected for display)."""
|
||||
"""Return the full registry (providers + hosts + models + roles)."""
|
||||
return _load(username)
|
||||
|
||||
|
||||
def get_all_models(username: str) -> list[dict]:
|
||||
"""Return all user-defined models (resolved — hosts merged in)."""
|
||||
"""Return all user-defined models (resolved — credentials/hosts merged in)."""
|
||||
registry = _load(username)
|
||||
out = []
|
||||
for m in registry.get("models", []):
|
||||
@@ -319,24 +474,94 @@ def get_all_models(username: str) -> list[dict]:
|
||||
|
||||
|
||||
def get_defined_roles(username: str) -> dict[str, dict]:
|
||||
"""Return the roles section of the registry, filling gaps with empty dicts."""
|
||||
"""Return the roles section, filling gaps with empty dicts."""
|
||||
registry = _load(username)
|
||||
roles = registry.get("roles", {})
|
||||
result = {}
|
||||
for role in settings.get_defined_roles():
|
||||
result[role] = roles.get(role, {})
|
||||
return result
|
||||
return {role: roles.get(role, {}) for role in settings.get_defined_roles()}
|
||||
|
||||
|
||||
# ── Write API (CRUD) ──────────────────────────────────────────────────────────
|
||||
def get_google_accounts(username: str) -> list[dict]:
|
||||
"""Return Google account entries (api_key masked for display)."""
|
||||
registry = _load(username)
|
||||
accounts = registry.get("providers", {}).get("google", {}).get("accounts", [])
|
||||
return [
|
||||
{
|
||||
"id": a["id"],
|
||||
"label": a.get("label", ""),
|
||||
"hint": (a.get("api_key") or "")[:8] + "…" if a.get("api_key") else "",
|
||||
}
|
||||
for a in accounts
|
||||
]
|
||||
|
||||
|
||||
def get_catalog(provider: str, username: str | None = None) -> list[dict]:
|
||||
"""
|
||||
Return the model catalog for a provider.
|
||||
|
||||
For now returns server defaults. Phase 2 will merge in per-user additions.
|
||||
"""
|
||||
if provider == "anthropic":
|
||||
return list(ANTHROPIC_CATALOG)
|
||||
if provider == "google":
|
||||
return list(GOOGLE_CATALOG)
|
||||
return []
|
||||
|
||||
|
||||
# ── Write API — Google accounts ───────────────────────────────────────────────
|
||||
|
||||
def save_google_account(username: str, account_id: str | None,
|
||||
label: str, api_key: str) -> str:
|
||||
"""Create or update a Google account entry. Returns the account ID."""
|
||||
data = _load(username)
|
||||
accounts = data["providers"]["google"]["accounts"]
|
||||
|
||||
if account_id:
|
||||
for a in accounts:
|
||||
if a["id"] == account_id:
|
||||
a["label"] = label.strip()
|
||||
if api_key.strip():
|
||||
a["api_key"] = api_key.strip()
|
||||
_save(username, data)
|
||||
return account_id
|
||||
|
||||
account_id = secrets.token_hex(4)
|
||||
accounts.append({
|
||||
"id": account_id,
|
||||
"label": label.strip(),
|
||||
"api_key": api_key.strip(),
|
||||
})
|
||||
_save(username, data)
|
||||
return account_id
|
||||
|
||||
|
||||
def remove_google_account(username: str, account_id: str) -> bool:
|
||||
"""Remove a Google account. Clears any model entries that reference it."""
|
||||
data = _load(username)
|
||||
accounts = data["providers"]["google"]["accounts"]
|
||||
before = len(accounts)
|
||||
data["providers"]["google"]["accounts"] = [a for a in accounts if a["id"] != account_id]
|
||||
|
||||
# Clear role assignments for models that referenced this account
|
||||
removed_model_ids = {
|
||||
m["id"] for m in data.get("models", [])
|
||||
if m.get("account_id") == account_id
|
||||
}
|
||||
data["models"] = [m for m in data.get("models", []) if m["id"] not in removed_model_ids]
|
||||
for role_cfg in data.get("roles", {}).values():
|
||||
for key in PRIORITY_KEYS:
|
||||
if role_cfg.get(key) in removed_model_ids:
|
||||
role_cfg[key] = None
|
||||
|
||||
_save(username, data)
|
||||
return len(data["providers"]["google"]["accounts"]) < before
|
||||
|
||||
|
||||
# ── Write API — Hosts ─────────────────────────────────────────────────────────
|
||||
|
||||
def save_host(username: str, host_id: str | None,
|
||||
label: str, api_url: str, api_key: str,
|
||||
host_type: str = "openwebui") -> str:
|
||||
"""Create or update a host. Returns the host ID.
|
||||
|
||||
host_type: "openwebui" (default) or "openai" (OpenRouter, LiteLLM, etc.)
|
||||
"""
|
||||
"""Create or update a host. Returns the host ID."""
|
||||
data = _load(username)
|
||||
host_type = host_type if host_type in ("openwebui", "openai") else "openwebui"
|
||||
|
||||
@@ -350,7 +575,7 @@ def save_host(username: str, host_id: str | None,
|
||||
h["api_key"] = api_key.strip()
|
||||
_save(username, data)
|
||||
return host_id
|
||||
host_id = None # not found — create new
|
||||
host_id = None
|
||||
|
||||
host_id = secrets.token_hex(4)
|
||||
data["hosts"].append({
|
||||
@@ -365,25 +590,26 @@ def save_host(username: str, host_id: str | None,
|
||||
|
||||
|
||||
def remove_host(username: str, host_id: str) -> bool:
|
||||
"""Remove a host and all models that reference it. Returns True if found."""
|
||||
"""Remove a host and all models that reference it."""
|
||||
data = _load(username)
|
||||
before = len(data["hosts"])
|
||||
data["hosts"] = [h for h in data["hosts"] if h["id"] != host_id]
|
||||
data["models"] = [m for m in data["models"] if m.get("host_id") != host_id]
|
||||
# Clear any role assignments that pointed to removed models
|
||||
removed_ids = {m["id"] for m in data["models"] if m.get("host_id") == host_id}
|
||||
removed_model_ids = {m["id"] for m in data["models"] if m.get("host_id") == host_id}
|
||||
data["hosts"] = [h for h in data["hosts"] if h["id"] != host_id]
|
||||
data["models"] = [m for m in data["models"] if m.get("host_id") != host_id]
|
||||
for role_cfg in data.get("roles", {}).values():
|
||||
for key in PRIORITY_KEYS:
|
||||
if role_cfg.get(key) in removed_ids:
|
||||
if role_cfg.get(key) in removed_model_ids:
|
||||
role_cfg[key] = None
|
||||
_save(username, data)
|
||||
return len(data["hosts"]) < before
|
||||
|
||||
|
||||
# ── Write API — Models ────────────────────────────────────────────────────────
|
||||
|
||||
def save_model(username: str, model_id: str | None, host_id: str,
|
||||
label: str, model_name: str, context_k: int = 0,
|
||||
tags: list[str] | None = None) -> str:
|
||||
"""Create or update a model entry. Returns the model ID."""
|
||||
"""Create or update a local_openai model entry. Returns the model ID."""
|
||||
data = _load(username)
|
||||
tags = tags or []
|
||||
|
||||
@@ -405,6 +631,7 @@ def save_model(username: str, model_id: str | None, host_id: str,
|
||||
"type": "local_openai",
|
||||
"label": label.strip() or model_name.strip(),
|
||||
"model_name": model_name.strip(),
|
||||
"provider": "local",
|
||||
"host_id": host_id,
|
||||
"context_k": context_k,
|
||||
"tags": tags,
|
||||
@@ -413,17 +640,61 @@ def save_model(username: str, model_id: str | None, host_id: str,
|
||||
return model_id
|
||||
|
||||
|
||||
def save_cloud_model(username: str, model_id: str | None,
|
||||
provider: str, model_name: str, label: str,
|
||||
account_id: str | None = None,
|
||||
credential_id: str | None = None,
|
||||
context_k: int = 0,
|
||||
tags: list[str] | None = None) -> str:
|
||||
"""
|
||||
Create or update an Anthropic or Google model entry. Returns the model ID.
|
||||
|
||||
provider: "anthropic" | "google"
|
||||
account_id: Google only — references providers.google.accounts[].id
|
||||
credential_id: Anthropic only — e.g. "cli"
|
||||
"""
|
||||
_TYPE = {"google": "gemini_api", "anthropic": "claude_cli"}
|
||||
entry_type = _TYPE.get(provider, "gemini_api")
|
||||
data = _load(username)
|
||||
tags = tags or []
|
||||
|
||||
entry: dict = {
|
||||
"type": entry_type,
|
||||
"label": label.strip() or model_name.strip(),
|
||||
"model_name": model_name.strip(),
|
||||
"provider": provider,
|
||||
"context_k": context_k,
|
||||
"tags": tags,
|
||||
}
|
||||
if account_id:
|
||||
entry["account_id"] = account_id
|
||||
if credential_id:
|
||||
entry["credential_id"] = credential_id
|
||||
|
||||
if model_id:
|
||||
for m in data["models"]:
|
||||
if m["id"] == model_id:
|
||||
m.update(entry)
|
||||
_save(username, data)
|
||||
return model_id
|
||||
model_id = None
|
||||
|
||||
model_id = secrets.token_hex(4)
|
||||
entry["id"] = model_id
|
||||
data["models"].append(entry)
|
||||
_save(username, data)
|
||||
return model_id
|
||||
|
||||
|
||||
def remove_model(username: str, model_id: str) -> bool:
|
||||
"""Remove a model and clear any role assignments pointing to it."""
|
||||
data = _load(username)
|
||||
before = len(data["models"])
|
||||
data["models"] = [m for m in data["models"] if m["id"] != model_id]
|
||||
|
||||
for role_cfg in data.get("roles", {}).values():
|
||||
for key in PRIORITY_KEYS:
|
||||
if role_cfg.get(key) == model_id:
|
||||
role_cfg[key] = None
|
||||
|
||||
_save(username, data)
|
||||
return len(data["models"]) < before
|
||||
|
||||
@@ -434,8 +705,7 @@ def set_role(username: str, role: str, priority: str, model_id: str | None) -> b
|
||||
|
||||
priority must be one of: primary, backup_1, backup_2, backup_3, backup_4
|
||||
model_id None clears the slot.
|
||||
model_id "claude_cli" / "gemini_cli" / "gemini_api" are valid built-in IDs.
|
||||
Returns False if model_id is set but not found.
|
||||
Built-in IDs (claude_cli, gemini_cli, gemini_api) are always valid.
|
||||
"""
|
||||
if priority not in PRIORITY_KEYS:
|
||||
return False
|
||||
@@ -455,10 +725,14 @@ def set_role(username: str, role: str, priority: str, model_id: str | None) -> b
|
||||
return True
|
||||
|
||||
|
||||
def fetch_models_from_host(api_url: str, api_key: str) -> list[str]:
|
||||
# ── Utility ───────────────────────────────────────────────────────────────────
|
||||
|
||||
def fetch_models_from_host(api_url: str, api_key: str,
|
||||
host_type: str = "openwebui") -> list[str]:
|
||||
"""Synchronously fetch the model list from an OpenAI-compatible host."""
|
||||
import httpx
|
||||
url = api_url.rstrip("/") + "/api/models"
|
||||
path = "/api/models" if host_type == "openwebui" else "/models"
|
||||
url = api_url.rstrip("/") + path
|
||||
headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
|
||||
resp = httpx.get(url, headers=headers, timeout=10)
|
||||
resp.raise_for_status()
|
||||
|
||||
@@ -57,6 +57,8 @@ async def run(
|
||||
session_messages: list[dict] | None = None,
|
||||
respond_with_claude: bool = True,
|
||||
gemini_api_key: str | None = None,
|
||||
model_name: str | None = None,
|
||||
response_role: str = "chat",
|
||||
) -> OrchestratorResult:
|
||||
"""
|
||||
Run the full orchestration loop for a task.
|
||||
@@ -96,7 +98,7 @@ async def run(
|
||||
|
||||
response = await asyncio.to_thread(
|
||||
client.models.generate_content,
|
||||
model=settings.orchestrator_model,
|
||||
model=model_name or settings.orchestrator_model,
|
||||
contents=contents,
|
||||
config=types.GenerateContentConfig(
|
||||
tools=TOOL_DECLARATIONS,
|
||||
@@ -175,7 +177,7 @@ async def run(
|
||||
response_text, backend = await complete(
|
||||
system_prompt=system_prompt,
|
||||
messages=messages,
|
||||
model="claude",
|
||||
role=response_role,
|
||||
)
|
||||
else:
|
||||
# Cron/background tasks: return Gemini's summary directly, no Claude call
|
||||
|
||||
@@ -20,7 +20,7 @@ router = APIRouter()
|
||||
|
||||
|
||||
def _backend_label(backend: str, username: str, role: str = "chat") -> str:
|
||||
"""Human-readable label for the model that handled a request."""
|
||||
"""Human-readable label for the model that handled a request (legacy path)."""
|
||||
if backend == "claude":
|
||||
return "Claude"
|
||||
if backend == "gemini":
|
||||
@@ -33,15 +33,24 @@ def _backend_label(backend: str, username: str, role: str = "chat") -> str:
|
||||
return backend.title()
|
||||
|
||||
|
||||
def _role_model_label(username: str, role: str, actual_backend: str) -> str:
|
||||
"""Return the model label for a role, falling back to the generic backend label."""
|
||||
cfg = model_registry.get_model_for_role(username, role)
|
||||
if cfg:
|
||||
return cfg.get("label") or cfg.get("model_name") or _backend_label(actual_backend, username, role)
|
||||
return _backend_label(actual_backend, username, role)
|
||||
|
||||
|
||||
class ChatRequest(BaseModel):
|
||||
message: str
|
||||
session_id: str | None = None
|
||||
tier: int | None = None
|
||||
model: str | None = None # "claude" or "gemini" to override; None = use primary_backend
|
||||
model: str | None = None # legacy backend override ("claude"|"gemini"|"local")
|
||||
chat_role: str = "chat" # active role: "chat"|"coder"|"research"|"distill" etc.
|
||||
include_long: bool = True
|
||||
include_mid: bool = True
|
||||
include_short: bool = True
|
||||
off_record: bool = False # skip session log (in-memory context preserved)
|
||||
off_record: bool = False # skip session log (in-memory context preserved)
|
||||
user: str = "scott"
|
||||
persona: str = "inara"
|
||||
|
||||
@@ -94,6 +103,7 @@ async def _stream_chat(req: ChatRequest):
|
||||
system_prompt=system_prompt,
|
||||
messages=history,
|
||||
model=req.model,
|
||||
role=req.chat_role,
|
||||
))
|
||||
|
||||
try:
|
||||
@@ -109,7 +119,7 @@ async def _stream_chat(req: ChatRequest):
|
||||
|
||||
try:
|
||||
response_text, actual_backend = task.result()
|
||||
backend_label = _backend_label(actual_backend, user, role="chat")
|
||||
backend_label = _role_model_label(user, req.chat_role, actual_backend)
|
||||
host = platform.node()
|
||||
history.append({
|
||||
"role": "assistant",
|
||||
@@ -164,28 +174,65 @@ _BACKEND_CYCLE = ("claude", "gemini", "local")
|
||||
_BACKEND_FALLBACK = {"claude": "gemini", "gemini": "claude", "local": "claude"}
|
||||
|
||||
|
||||
def _request_user(request: Request) -> str | None:
|
||||
"""Extract username from JWT cookie, or None."""
|
||||
try:
|
||||
token = request.cookies.get(COOKIE_NAME)
|
||||
return decode_token(token) if token else None
|
||||
except (jwt.InvalidTokenError, Exception):
|
||||
return None
|
||||
|
||||
|
||||
def _local_model_info(request: Request) -> dict | None:
|
||||
"""Return the best local model {label, model_name} for the session user, or None."""
|
||||
username = _request_user(request)
|
||||
if not username:
|
||||
return None
|
||||
try:
|
||||
token = request.cookies.get(COOKIE_NAME)
|
||||
username = decode_token(token) if token else None
|
||||
if not username:
|
||||
return None
|
||||
cfg = model_registry.get_best_local_model(username, "chat")
|
||||
if cfg:
|
||||
return {"label": cfg.get("label", ""), "model_name": cfg.get("model_name", "")}
|
||||
except (jwt.InvalidTokenError, Exception):
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def _available_roles_for_toggle(username: str) -> list[dict]:
|
||||
"""Return roles with a primary model assigned (excluding orchestrator) for the UI toggle.
|
||||
|
||||
Returns [{role, label, model_label, type}] ordered by settings.defined_roles.
|
||||
"""
|
||||
registry = model_registry.get_registry(username)
|
||||
roles_cfg = registry.get("roles", {})
|
||||
result = []
|
||||
for role_name in settings.get_defined_roles():
|
||||
if role_name == "orchestrator":
|
||||
continue
|
||||
primary_id = roles_cfg.get(role_name, {}).get("primary")
|
||||
if not primary_id:
|
||||
continue
|
||||
resolved = model_registry._resolve_model(registry, primary_id)
|
||||
if resolved:
|
||||
result.append({
|
||||
"role": role_name,
|
||||
"label": role_name.title(),
|
||||
"model_label": resolved.get("label") or resolved.get("model_name") or "",
|
||||
"type": resolved.get("type", ""),
|
||||
})
|
||||
return result
|
||||
|
||||
|
||||
@router.get("/backend")
|
||||
async def get_backend(request: Request) -> dict:
|
||||
username = _request_user(request)
|
||||
available_roles = _available_roles_for_toggle(username) if username else []
|
||||
p = settings.primary_backend
|
||||
return {
|
||||
"primary": p,
|
||||
"fallback": _BACKEND_FALLBACK.get(p, "claude"),
|
||||
"local_model": _local_model_info(request),
|
||||
"available_roles": available_roles,
|
||||
# Legacy fields kept for backward compat
|
||||
"primary": p,
|
||||
"fallback": _BACKEND_FALLBACK.get(p, "claude"),
|
||||
"local_model": _local_model_info(request),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,15 +1,19 @@
|
||||
"""
|
||||
Model Registry settings — hosts, models, and role assignments.
|
||||
Model Registry settings — providers, hosts, models, and role assignments.
|
||||
|
||||
Routes:
|
||||
GET /settings/local → settings page
|
||||
POST /settings/local/host → save/create a host
|
||||
POST /settings/local/host/{id}/remove → remove a host (and its models)
|
||||
POST /settings/local/models/add → add a model entry
|
||||
POST /settings/local/models/{id}/remove → remove a model
|
||||
POST /api/models/role → AJAX: set a role assignment
|
||||
GET /api/local-llm/fetch-models → proxy to host /api/models (JSON)
|
||||
GET /settings/models → settings page (canonical)
|
||||
GET /settings/local → redirect to /settings/models
|
||||
POST /settings/local/host → save/create a local host
|
||||
POST /settings/local/host/{id}/remove → remove a host (and its models)
|
||||
POST /settings/local/google-account → save/create a Google account
|
||||
POST /settings/local/google-account/{id}/remove → remove a Google account
|
||||
POST /settings/local/models/add → add a model (any provider)
|
||||
POST /settings/local/models/{id}/remove → remove a model
|
||||
POST /api/models/role → AJAX: set a role assignment
|
||||
GET /api/local-llm/fetch-models → proxy to host /api/models (JSON)
|
||||
"""
|
||||
import json as _json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
@@ -43,21 +47,39 @@ def _get_user(request: Request) -> str | None:
|
||||
# ── Page renderer ─────────────────────────────────────────────────────────────
|
||||
|
||||
def _render(username: str, success: str = "", error: str = "") -> str:
|
||||
registry = reg.get_registry(username)
|
||||
hosts = registry.get("hosts", [])
|
||||
models = registry.get("models", [])
|
||||
roles = registry.get("roles", {})
|
||||
builtins = reg._builtins()
|
||||
registry = reg.get_registry(username)
|
||||
hosts = registry.get("hosts", [])
|
||||
models = registry.get("models", [])
|
||||
roles = registry.get("roles", {})
|
||||
builtins = reg._builtins()
|
||||
host_by_id = {h["id"]: h for h in hosts}
|
||||
goog_accts = registry.get("providers", {}).get("google", {}).get("accounts", [])
|
||||
|
||||
host_by_id = {h["id"]: h for h in hosts}
|
||||
# ── Google account rows ───────────────────────────────────────────────────
|
||||
google_account_rows = ""
|
||||
for a in goog_accts:
|
||||
hint = (a.get("api_key") or "")[:10] + "…" if a.get("api_key") else "no key"
|
||||
google_account_rows += f'''
|
||||
<div class="account-row">
|
||||
<div>
|
||||
<span class="account-label">{a.get("label") or "Unnamed"}</span>
|
||||
<span class="account-hint">{hint}</span>
|
||||
</div>
|
||||
<form method="POST" action="/settings/local/google-account/{a["id"]}/remove"
|
||||
onsubmit="return confirm('Remove this Google account?')">
|
||||
<button type="submit" class="btn-link danger">Remove</button>
|
||||
</form>
|
||||
</div>'''
|
||||
if not google_account_rows:
|
||||
google_account_rows = '<p class="empty-note">No accounts configured yet.</p>'
|
||||
|
||||
# ── Host rows ─────────────────────────────────────────────────────────────
|
||||
# ── Local host rows ───────────────────────────────────────────────────────
|
||||
host_rows = ""
|
||||
for h in hosts:
|
||||
key_hint = f"…{h['api_key'][-4:]}" if h.get("api_key") else "not set"
|
||||
ht = h.get("host_type", "openwebui")
|
||||
ow_sel = ' selected' if ht == "openwebui" else ''
|
||||
ai_sel = ' selected' if ht == "openai" else ''
|
||||
key_hint = f"…{h['api_key'][-4:]}" if h.get("api_key") else "not set"
|
||||
ht = h.get("host_type", "openwebui")
|
||||
ow = ' selected' if ht == "openwebui" else ''
|
||||
ai = ' selected' if ht == "openai" else ''
|
||||
host_rows += f'''
|
||||
<div class="host-row">
|
||||
<form method="POST" action="/settings/local/host" class="host-form">
|
||||
@@ -66,7 +88,7 @@ def _render(username: str, success: str = "", error: str = "") -> str:
|
||||
<div class="field">
|
||||
<label>Label</label>
|
||||
<input type="text" name="label" value="{h.get("label","")}"
|
||||
placeholder="Home ML Laptop" autocomplete="off" data-form-type="other">
|
||||
placeholder="Gaming Laptop" autocomplete="off" data-form-type="other">
|
||||
</div>
|
||||
<div class="field" style="flex:2">
|
||||
<label>API URL</label>
|
||||
@@ -79,85 +101,90 @@ def _render(username: str, success: str = "", error: str = "") -> str:
|
||||
<div class="field">
|
||||
<label>API Key</label>
|
||||
<input type="password" name="api_key" placeholder="Leave blank to keep existing"
|
||||
autocomplete="new-password" data-1p-ignore data-lpignore="true" data-form-type="other">
|
||||
autocomplete="new-password" data-1p-ignore data-lpignore="true"
|
||||
data-form-type="other">
|
||||
<p class="key-status">Current: {key_hint}</p>
|
||||
</div>
|
||||
<div class="field" style="flex:0 0 auto">
|
||||
<label>Type</label>
|
||||
<select name="host_type">
|
||||
<option value="openwebui"{ow_sel}>Open WebUI / Ollama</option>
|
||||
<option value="openai"{ai_sel}>OpenAI-compatible (OpenRouter, etc.)</option>
|
||||
<option value="openwebui"{ow}>Open WebUI / Ollama</option>
|
||||
<option value="openai"{ai}>OpenAI-compatible (OpenRouter, etc.)</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
<div class="btn-row">
|
||||
<button type="submit" class="btn btn-secondary btn-sm">Save host</button>
|
||||
<button type="submit" class="btn btn-secondary btn-sm">Save</button>
|
||||
<button type="button" class="btn btn-secondary btn-sm fetch-btn"
|
||||
data-host-id="{h["id"]}">Fetch models</button>
|
||||
<span class="fetch-status" id="fetch-{h["id"]}"></span>
|
||||
</div>
|
||||
</form>
|
||||
<form method="POST" action="/settings/local/host/{h["id"]}/remove"
|
||||
onsubmit="return confirm('Remove host and all its models?')" style="margin-top:0.5rem">
|
||||
onsubmit="return confirm('Remove host and all its models?')"
|
||||
style="margin-top:0.5rem">
|
||||
<button type="submit" class="btn-link danger">Remove host</button>
|
||||
</form>
|
||||
</div>'''
|
||||
|
||||
if not host_rows:
|
||||
host_rows = '<p class="empty-note">No hosts configured yet. Add one below.</p>'
|
||||
|
||||
# ── Host options for add-model form ───────────────────────────────────────
|
||||
host_options = "".join(
|
||||
f'<option value="{h["id"]}">{h.get("label") or h["api_url"]}</option>'
|
||||
for h in hosts
|
||||
)
|
||||
add_model_hidden = "" if hosts else ' style="display:none"'
|
||||
|
||||
# ── Model rows ────────────────────────────────────────────────────────────
|
||||
# ── Model rows (all providers) ────────────────────────────────────────────
|
||||
_PROVIDER_BADGE = {
|
||||
"claude_cli": ('<span class="pbadge pb-anthropic">Anthropic</span>', "Claude CLI"),
|
||||
"gemini_api": ('<span class="pbadge pb-google">Google</span>', ""),
|
||||
"local_openai": ('<span class="pbadge pb-local">Local</span>', ""),
|
||||
}
|
||||
model_rows = ""
|
||||
for m in models:
|
||||
resolved = reg._resolve_model(registry, m["id"])
|
||||
if not resolved:
|
||||
continue
|
||||
host_name = ""
|
||||
if m.get("type") == "local_openai" and m.get("host_id"):
|
||||
h = host_by_id.get(m["host_id"], {})
|
||||
host_name = h.get("label") or h.get("api_url", "")
|
||||
mtype = m.get("type", "local_openai")
|
||||
badge, default_secondary = _PROVIDER_BADGE.get(mtype, ("", ""))
|
||||
|
||||
ctx_badge = f'<span class="ctx-badge">{m.get("context_k",0)}k ctx</span>' if m.get("context_k") else ""
|
||||
tags_html = " ".join(
|
||||
f'<span class="tag">{t}</span>' for t in (m.get("tags") or [])
|
||||
)
|
||||
host_html = f'<span class="model-host">{host_name}</span>' if host_name else ""
|
||||
if mtype == "local_openai":
|
||||
h = host_by_id.get(m.get("host_id", ""), {})
|
||||
secondary = h.get("label") or h.get("api_url", "")
|
||||
elif mtype == "gemini_api":
|
||||
acct = next((a for a in goog_accts if a["id"] == m.get("account_id")), None)
|
||||
secondary = acct["label"] if acct else ""
|
||||
else:
|
||||
secondary = default_secondary
|
||||
|
||||
ctx = f'<span class="ctx-badge">{m.get("context_k",0)}k</span>' if m.get("context_k") else ""
|
||||
tags = " ".join(f'<span class="tag">{t}</span>' for t in (m.get("tags") or []))
|
||||
sec = f'<span class="model-host">{secondary}</span>' if secondary else ""
|
||||
|
||||
model_rows += f'''
|
||||
<div class="model-row" id="model-{m["id"]}">
|
||||
<div class="model-row">
|
||||
<div class="model-info">
|
||||
<span class="model-label">{m.get("label") or m.get("model_name","")}</span>
|
||||
<div>{badge}<span class="model-label">{m.get("label") or m.get("model_name","")}</span>{ctx}</div>
|
||||
<span class="model-name">{m.get("model_name","")}</span>
|
||||
{host_html}{ctx_badge}
|
||||
<div class="tag-row">{tags_html}</div>
|
||||
</div>
|
||||
<div class="model-actions">
|
||||
<form method="POST" action="/settings/local/models/{m["id"]}/remove"
|
||||
onsubmit="return confirm('Remove this model?')" style="display:inline">
|
||||
<button type="submit" class="row-btn danger">Remove</button>
|
||||
</form>
|
||||
{sec}
|
||||
<div class="tag-row">{tags}</div>
|
||||
</div>
|
||||
<form method="POST" action="/settings/local/models/{m["id"]}/remove"
|
||||
onsubmit="return confirm('Remove this model?')" style="display:inline">
|
||||
<button type="submit" class="row-btn danger">Remove</button>
|
||||
</form>
|
||||
</div>'''
|
||||
|
||||
if not model_rows:
|
||||
model_rows = '<p class="empty-note">No models added yet.</p>'
|
||||
|
||||
# ── Role assignment rows ──────────────────────────────────────────────────
|
||||
# Build option list: (none) + built-ins + user models
|
||||
model_opts = '<option value="">— .env default —</option>\n'
|
||||
model_opts += '<optgroup label="Built-in">\n'
|
||||
for bid, bm in builtins.items():
|
||||
model_opts += f' <option value="{bid}">{bm["label"]}</option>\n'
|
||||
model_opts += '</optgroup>\n'
|
||||
if models:
|
||||
model_opts += '<optgroup label="Local models">\n'
|
||||
model_opts += '<optgroup label="Configured models">\n'
|
||||
for m in models:
|
||||
lbl = m.get("label") or m.get("model_name", m["id"])
|
||||
model_opts += f' <option value="{m["id"]}">{lbl}</option>\n'
|
||||
@@ -166,30 +193,47 @@ def _render(username: str, success: str = "", error: str = "") -> str:
|
||||
role_rows = ""
|
||||
for role in app_settings.get_defined_roles():
|
||||
role_cfg = roles.get(role, {})
|
||||
role_rows += f'<div class="role-row" data-role="{role}"><span class="role-name">{role.title()}</span><div class="role-slots">'
|
||||
for slot in reg.PRIORITY_KEYS[:3]: # primary + backup_1 + backup_2
|
||||
current = role_cfg.get(slot) or ""
|
||||
role_rows += (
|
||||
f'<div class="role-row" data-role="{role}">'
|
||||
f'<span class="role-name">{role.title()}</span>'
|
||||
f'<div class="role-slots">'
|
||||
)
|
||||
for slot in reg.PRIORITY_KEYS[:3]:
|
||||
slot_label = slot.replace("_", " ").title()
|
||||
sel_html = f'<select class="role-select" data-role="{role}" data-slot="{slot}" title="{slot_label}">\n{model_opts}\n</select>'
|
||||
# Pre-select current value via JS (simpler than string-building selected attrs)
|
||||
role_rows += f'<div class="role-slot"><span class="slot-label">{slot_label}</span>{sel_html}</div>'
|
||||
sel = (
|
||||
f'<select class="role-select" data-role="{role}" '
|
||||
f'data-slot="{slot}" title="{slot_label}">\n{model_opts}\n</select>'
|
||||
)
|
||||
role_rows += f'<div class="role-slot"><span class="slot-label">{slot_label}</span>{sel}</div>'
|
||||
role_rows += '</div></div>'
|
||||
|
||||
# JS data for pre-selecting current role values
|
||||
import json as _json
|
||||
role_data_js = _json.dumps({
|
||||
role: {slot: (roles.get(role, {}).get(slot) or "") for slot in reg.PRIORITY_KEYS[:3]}
|
||||
for role in app_settings.get_defined_roles()
|
||||
})
|
||||
|
||||
# ── Catalog data + Google accounts for JS ─────────────────────────────────
|
||||
google_accounts_js = _json.dumps(reg.get_google_accounts(username))
|
||||
google_catalog_js = _json.dumps(reg.get_catalog("google"))
|
||||
anthropic_catalog_js = _json.dumps(reg.get_catalog("anthropic"))
|
||||
has_hosts = "true" if hosts else "false"
|
||||
|
||||
html = (_STATIC / "local_llm.html").read_text()
|
||||
html = html.replace("{{ username }}", username)
|
||||
html = html.replace("{{ host_rows }}", host_rows)
|
||||
html = html.replace("{{ model_rows }}", model_rows)
|
||||
html = html.replace("{{ host_options }}", host_options)
|
||||
html = html.replace("{{ add_model_hidden }}", add_model_hidden)
|
||||
html = html.replace("{{ role_rows }}", role_rows)
|
||||
html = html.replace("{{ role_data_js }}", role_data_js)
|
||||
replacements = {
|
||||
"{{ username }}": username,
|
||||
"{{ google_account_rows }}": google_account_rows,
|
||||
"{{ host_rows }}": host_rows,
|
||||
"{{ model_rows }}": model_rows,
|
||||
"{{ host_options }}": host_options,
|
||||
"{{ role_rows }}": role_rows,
|
||||
"{{ role_data_js }}": role_data_js,
|
||||
"{{ google_accounts_js }}": google_accounts_js,
|
||||
"{{ google_catalog_js }}": google_catalog_js,
|
||||
"{{ anthropic_catalog_js }}": anthropic_catalog_js,
|
||||
"{{ has_hosts }}": has_hosts,
|
||||
}
|
||||
for key, val in replacements.items():
|
||||
html = html.replace(key, val)
|
||||
if success:
|
||||
html = html.replace("<!-- SUCCESS -->", f'<p class="msg success">{success}</p>')
|
||||
if error:
|
||||
@@ -199,14 +243,44 @@ def _render(username: str, success: str = "", error: str = "") -> str:
|
||||
|
||||
# ── Routes ────────────────────────────────────────────────────────────────────
|
||||
|
||||
@router.get("/settings/local", include_in_schema=False)
|
||||
async def models_page(request: Request):
|
||||
@router.get("/settings/models", include_in_schema=False)
|
||||
async def models_page_canonical(request: Request):
|
||||
username = _get_user(request)
|
||||
if not username:
|
||||
return RedirectResponse("/login", status_code=302)
|
||||
return HTMLResponse(_render(username))
|
||||
|
||||
|
||||
@router.get("/settings/local", include_in_schema=False)
|
||||
async def models_page_legacy(request: Request):
|
||||
return RedirectResponse("/settings/models", status_code=301)
|
||||
|
||||
|
||||
@router.post("/settings/local/google-account", include_in_schema=False)
|
||||
async def save_google_account(
|
||||
request: Request,
|
||||
account_id: str = Form(""),
|
||||
label: str = Form(""),
|
||||
api_key: str = Form(""),
|
||||
):
|
||||
username = _get_user(request)
|
||||
if not username:
|
||||
return RedirectResponse("/login", status_code=302)
|
||||
if not api_key.strip() and not account_id.strip():
|
||||
return HTMLResponse(_render(username, error="API key is required."))
|
||||
reg.save_google_account(username, account_id or None, label, api_key)
|
||||
return HTMLResponse(_render(username, success="Google account saved."))
|
||||
|
||||
|
||||
@router.post("/settings/local/google-account/{account_id}/remove", include_in_schema=False)
|
||||
async def remove_google_account(request: Request, account_id: str):
|
||||
username = _get_user(request)
|
||||
if not username:
|
||||
return RedirectResponse("/login", status_code=302)
|
||||
reg.remove_google_account(username, account_id)
|
||||
return HTMLResponse(_render(username, success="Google account removed."))
|
||||
|
||||
|
||||
@router.post("/settings/local/host", include_in_schema=False)
|
||||
async def save_host(
|
||||
request: Request,
|
||||
@@ -222,7 +296,6 @@ async def save_host(
|
||||
if not api_url.strip():
|
||||
return HTMLResponse(_render(username, error="API URL is required."))
|
||||
reg.save_host(username, host_id or None, label, api_url, api_key, host_type)
|
||||
logger.info("model registry host saved: %s (%s)", username, host_type)
|
||||
return HTMLResponse(_render(username, success="Host saved."))
|
||||
|
||||
|
||||
@@ -237,22 +310,50 @@ async def remove_host(request: Request, host_id: str):
|
||||
|
||||
@router.post("/settings/local/models/add", include_in_schema=False)
|
||||
async def add_model(
|
||||
request: Request,
|
||||
host_id: str = Form(...),
|
||||
label: str = Form(""),
|
||||
model_name: str = Form(...),
|
||||
context_k: int = Form(0),
|
||||
tags: str = Form(""),
|
||||
request: Request,
|
||||
provider: str = Form("local"),
|
||||
label: str = Form(""),
|
||||
context_k: int = Form(0),
|
||||
tags: str = Form(""),
|
||||
# local-only fields
|
||||
host_id: str = Form(""),
|
||||
model_name: str = Form(""),
|
||||
# cloud-only fields
|
||||
cloud_model_name: str = Form(""),
|
||||
account_id: str = Form(""),
|
||||
credential_id: str = Form("cli"),
|
||||
):
|
||||
username = _get_user(request)
|
||||
if not username:
|
||||
return RedirectResponse("/login", status_code=302)
|
||||
if not model_name.strip():
|
||||
return HTMLResponse(_render(username, error="Model name is required."))
|
||||
|
||||
tag_list = [t.strip() for t in tags.split(",") if t.strip()]
|
||||
reg.save_model(username, None, host_id, label, model_name, context_k, tag_list)
|
||||
logger.info("model added to registry: %s / %s", username, model_name)
|
||||
return HTMLResponse(_render(username, success=f'Model "{label or model_name}" added.'))
|
||||
|
||||
if provider == "local":
|
||||
if not model_name.strip():
|
||||
return HTMLResponse(_render(username, error="Model name is required."))
|
||||
if not host_id.strip():
|
||||
return HTMLResponse(_render(username, error="Select a host."))
|
||||
reg.save_model(username, None, host_id, label, model_name, context_k, tag_list)
|
||||
display = label or model_name
|
||||
|
||||
elif provider in ("google", "anthropic"):
|
||||
if not cloud_model_name.strip():
|
||||
return HTMLResponse(_render(username, error="Select a model from the catalog."))
|
||||
if provider == "google" and not account_id.strip():
|
||||
return HTMLResponse(_render(username, error="Select a Google account."))
|
||||
reg.save_cloud_model(
|
||||
username, None, provider, cloud_model_name, label,
|
||||
account_id=account_id or None,
|
||||
credential_id=credential_id or None,
|
||||
context_k=context_k, tags=tag_list,
|
||||
)
|
||||
display = label or cloud_model_name
|
||||
else:
|
||||
return HTMLResponse(_render(username, error=f"Unknown provider: {provider}"))
|
||||
|
||||
logger.info("model added: %s / %s (%s)", username, display, provider)
|
||||
return HTMLResponse(_render(username, success=f'Model "{display}" added.'))
|
||||
|
||||
|
||||
@router.post("/settings/local/models/{model_id}/remove", include_in_schema=False)
|
||||
@@ -287,7 +388,7 @@ async def set_role(request: Request) -> JSONResponse:
|
||||
|
||||
ok = reg.set_role(username, role, slot, model_id)
|
||||
if not ok:
|
||||
return JSONResponse({"error": f"Invalid slot or model_id not found"}, status_code=400)
|
||||
return JSONResponse({"error": "Invalid slot or model_id not found"}, status_code=400)
|
||||
|
||||
logger.info("role set: %s %s.%s = %s", username, role, slot, model_id)
|
||||
return JSONResponse({"ok": True})
|
||||
@@ -295,31 +396,24 @@ async def set_role(request: Request) -> JSONResponse:
|
||||
|
||||
@router.get("/api/local-llm/fetch-models")
|
||||
async def fetch_models(request: Request, host_id: str = "") -> JSONResponse:
|
||||
"""Proxy to the host's /api/models endpoint. host_id selects which host."""
|
||||
"""Proxy to the host's models endpoint. host_id selects which host."""
|
||||
username = _get_user(request)
|
||||
if not username:
|
||||
return JSONResponse({"error": "Not authenticated"}, status_code=401)
|
||||
|
||||
registry = reg.get_registry(username)
|
||||
hosts = registry.get("hosts", [])
|
||||
hosts = registry.get("hosts", [])
|
||||
|
||||
if host_id:
|
||||
host = next((h for h in hosts if h["id"] == host_id), None)
|
||||
else:
|
||||
host = hosts[0] if hosts else None
|
||||
host = next((h for h in hosts if h["id"] == host_id), None) if host_id else (hosts[0] if hosts else None)
|
||||
|
||||
# Fall back to .env
|
||||
if host:
|
||||
api_url = host.get("api_url", "")
|
||||
api_key = host.get("api_key", "")
|
||||
api_url, api_key, host_type = host.get("api_url",""), host.get("api_key",""), host.get("host_type","openwebui")
|
||||
else:
|
||||
api_url = app_settings.local_api_url
|
||||
api_key = app_settings.local_api_key
|
||||
api_url, api_key, host_type = app_settings.local_api_url, app_settings.local_api_key, "openwebui"
|
||||
|
||||
if not api_url:
|
||||
return JSONResponse({"error": "No host configured."}, status_code=400)
|
||||
|
||||
host_type = host.get("host_type", "openwebui") if host else "openwebui"
|
||||
models_path = "/models" if host_type == "openai" else "/api/models"
|
||||
url = api_url.rstrip("/") + models_path
|
||||
headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
|
||||
@@ -329,11 +423,10 @@ async def fetch_models(request: Request, host_id: str = "") -> JSONResponse:
|
||||
resp = await client.get(url, headers=headers)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
models = [
|
||||
{"id": m["id"], "name": m.get("name") or m["id"]}
|
||||
for m in data.get("data", [])
|
||||
]
|
||||
models.sort(key=lambda m: m["name"].lower())
|
||||
models = sorted(
|
||||
[{"id": m["id"], "name": m.get("name") or m["id"]} for m in data.get("data", [])],
|
||||
key=lambda m: m["name"].lower(),
|
||||
)
|
||||
return JSONResponse({"models": models})
|
||||
except httpx.HTTPStatusError as e:
|
||||
return JSONResponse({"error": f"Host returned {e.response.status_code}"}, status_code=502)
|
||||
|
||||
@@ -52,6 +52,7 @@ class OrchestrateRequest(BaseModel):
|
||||
include_short: bool = True
|
||||
user: str = "scott"
|
||||
persona: str = "inara"
|
||||
chat_role: str = "chat" # role used for the final response (decoupled from tool-loop model)
|
||||
|
||||
|
||||
class OrchestrateResponse(BaseModel):
|
||||
@@ -171,12 +172,20 @@ async def _run_job(job_id: str, req: OrchestrateRequest, user: str) -> None:
|
||||
respond_with_final=req.respond_with_claude,
|
||||
)
|
||||
else:
|
||||
# Use the API key embedded in the resolved model config (V2 registry with
|
||||
# account_id), then fall back to the per-user key from auth.json, then .env.
|
||||
gemini_key = (
|
||||
(orch_model.get("api_key") if orch_model else None)
|
||||
or get_user_gemini_key(user)
|
||||
)
|
||||
result = await orchestrator_engine.run(
|
||||
task=req.task,
|
||||
system_prompt=system_prompt,
|
||||
session_messages=session_messages,
|
||||
respond_with_claude=req.respond_with_claude,
|
||||
gemini_api_key=get_user_gemini_key(user),
|
||||
gemini_api_key=gemini_key,
|
||||
model_name=orch_model.get("model_name") if orch_model else None,
|
||||
response_role=req.chat_role,
|
||||
)
|
||||
|
||||
# Save the turn to the session store so it survives a page refresh
|
||||
|
||||
@@ -42,20 +42,11 @@ def _settings_page(username: str, personas: list[str], success: str = "", error:
|
||||
html = (_STATIC / "settings.html").read_text()
|
||||
html = html.replace("{{ username }}", username)
|
||||
|
||||
# Connected Google account
|
||||
# Connected Google account (OAuth sign-in)
|
||||
auth_data = _read_auth(username)
|
||||
google_email = auth_data.get("google_email") or ""
|
||||
html = html.replace("{{ google_email }}", google_email)
|
||||
|
||||
# Gemini API key — show masked hint only, never the full key
|
||||
gemini_key = auth_data.get("gemini_api_key") or ""
|
||||
if gemini_key:
|
||||
hint = f"Saved (…{gemini_key[-4:]})"
|
||||
else:
|
||||
hint = "Using server key"
|
||||
html = html.replace("{{ gemini_key_hint }}", hint)
|
||||
html = html.replace("{{ gemini_key_set }}", "true" if gemini_key else "false")
|
||||
|
||||
persona_items = "\n".join(
|
||||
f'''<li>
|
||||
<a href="/{username}/{p}" class="persona-link">{p}</a>
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
and are appended automatically by help.html when present.
|
||||
-->
|
||||
|
||||
*Last updated: 2026-03-27*
|
||||
*Last updated: 2026-04-28*
|
||||
|
||||
---
|
||||
|
||||
@@ -15,21 +15,21 @@
|
||||
| Button | What it does |
|
||||
|---|---|
|
||||
| **Sessions** | Open the sessions panel — list, resume, or start sessions |
|
||||
| **Files** | Open the identity file editor (SOUL, MEMORY, etc.) |
|
||||
| **⚙ N** | Open the Settings panel (N = current context tier) |
|
||||
| **N** (sliders icon) | Open the Context & Memory panel (N = current context tier) |
|
||||
| **☰** | Settings menu — Files, Account, Sign Out |
|
||||
| **?** | Open this help panel |
|
||||
|
||||
The **⚙ Settings** panel contains all configuration options:
|
||||
The **Context & Memory** panel (sliders icon with tier number) contains all configuration options:
|
||||
|
||||
| Section | Controls |
|
||||
|---|---|
|
||||
| **Context Tier** | T1 – T4 context depth |
|
||||
| **Memory Layers** | Toggle Long / Mid / Short memory on/off |
|
||||
| **Distill Memory** | Manually trigger short / mid / long / all distillation |
|
||||
| **Backend** | Active LLM backend — click to toggle claude ↔ gemini |
|
||||
| **Display** | Aa/A+/A− font size cycle · ☾/☀ theme toggle |
|
||||
| **Distill Memory** | Manually trigger Short / Mid / Long / All distillation |
|
||||
| **Role** | Active LLM role — click to cycle through configured role assignments |
|
||||
| **Display** | **Aa** cycles font size · **☾** toggles theme · **S/M/L** cycles input area height · **⌃↵** toggles send shortcut |
|
||||
|
||||
All header settings (theme, font size, tier, memory layers) persist in `localStorage` across page refreshes.
|
||||
All settings persist in `localStorage` across page refreshes.
|
||||
|
||||
---
|
||||
|
||||
@@ -42,21 +42,40 @@ All header settings (theme, font size, tier, memory layers) persist in `localSto
|
||||
- **Copy a response:** Hover over any assistant message → click **copy**.
|
||||
- **New line while typing:** `Shift+Enter` (in `Ctrl+Enter` mode) or `Shift+Enter` / Enter (in Enter mode).
|
||||
|
||||
Each assistant response shows a small **model tag** in the bottom-right corner identifying which model and host responded.
|
||||
|
||||
---
|
||||
|
||||
## Agent Mode
|
||||
## Tools (⚡)
|
||||
|
||||
Click the **Agent** button in the input row to enable Agent mode. The button highlights and Send changes to **Run**.
|
||||
Click the **⚡** button in the input row to enable the Tools toggle. When lit (amber), **Send** changes to **Run** and messages are routed through the **orchestrator** instead of directly to the chat model.
|
||||
|
||||
In Agent mode, messages are routed through the **orchestrator** instead of directly to Claude:
|
||||
The orchestrator runs a multi-step tool loop:
|
||||
|
||||
1. **Gemini** runs a tool loop — searches the web, reads files, checks tasks, calls APIs as needed
|
||||
2. **Claude** receives the enriched context and writes the final response
|
||||
3. A `⚡ N tool calls: …` note appears below the response listing what was used
|
||||
1. The **orchestrator model** reasons about the request and calls tools as needed — web search, file reads, task management, shell commands, Aether Journals, and more
|
||||
2. It produces an enriched summary of what it found
|
||||
3. The **responder model** (set by the active Role) receives that context and writes the final user-facing reply
|
||||
4. A `⚡ N tool calls: …` note appears below the response listing what was used
|
||||
|
||||
Agent mode is best for tasks that require research, multi-step reasoning, or tool use (e.g. "search for X", "add a task", "what's on my list?"). Regular chat is faster for conversational turns.
|
||||
The ⚡ toggle is **independent of the Role selector** — you can use any role (chat, coder, research, etc.) with or without tools. The orchestrator model is configured in **Account → Model Registry → Role Assignments → Orchestrator**. By default this is Gemini API.
|
||||
|
||||
Agent mode sessions persist to history exactly like regular chat — they survive page refreshes and appear in the Sessions panel.
|
||||
**Available tools:**
|
||||
|
||||
| Category | Tools |
|
||||
|---|---|
|
||||
| Web | `web_search` |
|
||||
| Files | `file_read` |
|
||||
| Shell | `shell_exec`, `claude_allow_dir` |
|
||||
| Tasks | `task_list`, `task_create`, `task_update`, `task_complete` |
|
||||
| Cron | `cron_list`, `cron_add`, `cron_remove`, `cron_toggle` |
|
||||
| Reminders | `reminders_add`, `reminders_list`, `reminders_clear` |
|
||||
| Scratchpad | `scratch_read`, `scratch_write`, `scratch_append`, `scratch_clear` |
|
||||
| Aether Journals | `ae_journal_list`, `ae_journal_search`, `ae_journal_entry_create`, `ae_journal_entry_update`, `ae_journal_entry_disable`, `ae_journal_entry_append`, `ae_journal_entry_prepend` |
|
||||
| Aether Tasks | `ae_task_list` |
|
||||
|
||||
Tools mode is best for tasks requiring research, multi-step reasoning, or side effects (e.g. "search for X", "add a task", "what's on my list?", "append this to my journal"). Regular chat is faster for conversational turns.
|
||||
|
||||
Orchestrated sessions persist to history exactly like regular chat.
|
||||
|
||||
---
|
||||
|
||||
@@ -84,10 +103,80 @@ Notes are injected into a session without triggering an LLM response.
|
||||
|
||||
## Backends
|
||||
|
||||
- **Claude CLI** and **Gemini CLI** are both available. One is primary, the other is fallback.
|
||||
- Click **⚙** → **Backend** to toggle between `claude` and `gemini` as the primary.
|
||||
- If the primary fails or times out, the fallback is used automatically. A **⚡** notice appears in the chat when this happens.
|
||||
- Timeouts: Claude 60s, Gemini 120s.
|
||||
Three backends are available:
|
||||
|
||||
| Backend | What it is |
|
||||
|---|---|
|
||||
| **Claude** | Anthropic Claude via the Claude CLI (OAuth — no API key needed) |
|
||||
| **Gemini** | Google Gemini via the Gemini CLI |
|
||||
| **Local** | Any OpenAI-compatible endpoint (Open WebUI, Ollama, OpenRouter, etc.) |
|
||||
|
||||
The **Role** toggle in the Context & Memory panel cycles through configured role assignments. Each role maps to a Primary / Backup 1 / Backup 2 model chain set in the Model Registry.
|
||||
|
||||
- The active model label appears below the toggle button
|
||||
- `auto` (default) uses the model assigned to the `chat` role in your Model Registry
|
||||
- Forcing a specific backend overrides the role assignment for that session
|
||||
|
||||
If the active backend fails, a fallback is tried automatically. A **⚡** badge appears on the response when this happens.
|
||||
|
||||
Each response shows a **model tag** (bottom-right of message) with the model label and host, so you always know what responded.
|
||||
|
||||
---
|
||||
|
||||
## Model Registry
|
||||
|
||||
Configure which AI models are available and which handles each task type.
|
||||
|
||||
**Navigate to:** ☰ (top-right menu) → **Account** → scroll to **Model Registry** → **Manage models →**
|
||||
|
||||
---
|
||||
|
||||
### Step 1 — Set up providers and hosts
|
||||
|
||||
Do this before adding models — models need a provider account or local host to attach to.
|
||||
|
||||
**Anthropic (Claude):** Nothing to configure. Claude uses your existing CLI OAuth session. If Claude isn't working, run `claude auth login` in a terminal.
|
||||
|
||||
**Google (Gemini):** Add one entry per API key you want to use:
|
||||
1. Scroll to **Cloud Providers → Google** → click **+ Add Google account**
|
||||
2. Enter a label (e.g. "Work", "Personal") and your API key
|
||||
3. Get a free key at [aistudio.google.com/apikey](https://aistudio.google.com/apikey)
|
||||
|
||||
**Local hosts** (Open WebUI, Ollama, OpenRouter, etc.):
|
||||
1. Scroll to **Local Hosts** → click **+ Add host** to expand the form
|
||||
2. Enter a label, the API URL (e.g. `http://192.168.1.100:3000`), and optional API key
|
||||
3. Set **Type**: Open WebUI / Ollama, or OpenAI-compatible (for OpenRouter, LM Studio, etc.)
|
||||
4. Click **Fetch models** on the saved host card to verify connectivity
|
||||
|
||||
---
|
||||
|
||||
### Step 2 — Add models
|
||||
|
||||
Scroll to **Add Model**. Select the provider tab, fill in the details, click **Add Model**:
|
||||
|
||||
| Tab | What you need |
|
||||
|---|---|
|
||||
| **Local** | Select a host (from Step 1) → enter model name, or use **Fetch from host** to pick from a live list |
|
||||
| **Google** | Select a Gemini model from the catalog → select a Google account (from Step 1) |
|
||||
| **Anthropic** | Select a Claude model from the catalog → uses your CLI session automatically |
|
||||
|
||||
The label and context window size auto-fill from the catalog — edit them if you want. Tags are optional.
|
||||
|
||||
---
|
||||
|
||||
### Step 3 — Assign models to roles
|
||||
|
||||
Scroll to **Role Assignments** at the bottom of the page. Each role has **Primary**, **Backup 1**, and **Backup 2** slots — Primary is tried first, then backups in order. Changes save automatically.
|
||||
|
||||
| Role | Used for |
|
||||
|---|---|
|
||||
| **Chat** | Regular conversation |
|
||||
| **Orchestrator** | Agent mode tool loop |
|
||||
| **Distill** | Memory distillation (short / mid / long) |
|
||||
| **Coder** | Code-focused tasks |
|
||||
| **Research** | Long-context research tasks |
|
||||
|
||||
Leave all slots empty to use the server default.
|
||||
|
||||
---
|
||||
|
||||
@@ -95,10 +184,10 @@ Notes are injected into a session without triggering an LLM response.
|
||||
|
||||
Inara is registered as a bot in Nextcloud Talk.
|
||||
|
||||
- Messages sent in enabled Talk conversations are received by Cortex, processed, and replied to by Inara.
|
||||
- The webhook returns `200 OK` immediately; the LLM call and reply happen asynchronously.
|
||||
- Messages sent in enabled Talk conversations are received by Cortex, processed, and replied to.
|
||||
- The webhook returns `200 OK` immediately; the reply happens asynchronously.
|
||||
- Real-time updates stream to the web UI via SSE — you see Talk messages and responses appear live.
|
||||
- To enable the bot in a conversation: open Talk conversation settings → Bots → enable Inara.
|
||||
- To enable the bot in a conversation: open Talk conversation settings → Bots → enable the bot.
|
||||
|
||||
---
|
||||
|
||||
@@ -108,29 +197,27 @@ Inara is available as a bot in Google Chat (One Sky IT Workspace).
|
||||
|
||||
- Send Inara a direct message in Google Chat to start a conversation.
|
||||
- Each DM thread is its own session (`gc_spaces/*` prefix) — history persists across messages.
|
||||
- Responses are synchronous — Google Chat displays Inara's reply directly in the thread.
|
||||
- Responses are synchronous — Google Chat displays the reply directly in the thread.
|
||||
- To add Inara to a space: open the space, add a person/app, search for **Inara**.
|
||||
- Sessions from Google Chat appear as `gc_*` prefixed IDs in the Sessions panel.
|
||||
|
||||
**Technical note:** Cortex uses Google's Workspace Add-on format (`hostAppDataAction`) — the modern API required for all Google Chat apps as of 2025.
|
||||
|
||||
---
|
||||
|
||||
## Files (Identity Editor)
|
||||
|
||||
The **Files** button opens an editor for Inara's identity and memory files:
|
||||
The **Files** button opens an editor for your persona's identity and memory files:
|
||||
|
||||
| File | Purpose |
|
||||
|---|---|
|
||||
| `SOUL.md` | Core personality, values, and voice |
|
||||
| `IDENTITY.md` | Role, capabilities, and context |
|
||||
| `USER.md` | Scott's profile, preferences, and history |
|
||||
| `USER.md` | Your profile, preferences, and history |
|
||||
| `PROTOCOLS.md` | Behavioural rules and communication protocols |
|
||||
| `CONTEXT_TIERS.md` | Defines what gets loaded at each context tier |
|
||||
| `MEMORY_LONG.md` | Permanent curated long-term memory |
|
||||
| `MEMORY_MID.md` | Rolling mid-term digest (LLM-distilled) |
|
||||
| `MEMORY_SHORT.md` | Recent session rollup (auto-aggregated) |
|
||||
| `TASKS.json` | Inara's personal task list (managed via Agent mode) |
|
||||
| `TASKS.json` | Personal task list (managed via Agent mode) |
|
||||
| `HELP.md` | This file |
|
||||
|
||||
Toggle **preview** / **edit** to switch between rendered markdown and raw text. **Ctrl+S** saves, **Esc** closes.
|
||||
@@ -154,19 +241,19 @@ Default is T2. Use T1 for small/local models. Use T3–T4 for complex multi-sess
|
||||
|
||||
### Memory Layers
|
||||
|
||||
Three independently toggleable memory files, loaded **Long → Mid → Short** (short sits closest to the conversation turn for better LLM recall):
|
||||
Three independently toggleable memory files, loaded **Long → Mid → Short**:
|
||||
|
||||
| Layer | File | Contents |
|
||||
|---|---|---|
|
||||
| **Long** | `MEMORY_LONG.md` | Permanent facts — origin, key decisions, Scott's profile highlights |
|
||||
| **Long** | `MEMORY_LONG.md` | Permanent facts — origin, key decisions, profile highlights |
|
||||
| **Mid** | `MEMORY_MID.md` | Rolling digest of recent weeks — LLM-distilled from Short |
|
||||
| **Short** | `MEMORY_SHORT.md` | Recent session rollup — auto-aggregated from session log files |
|
||||
| **Short** | `MEMORY_SHORT.md` | Recent session rollup — auto-aggregated from session logs |
|
||||
|
||||
Toggle any layer off to save tokens for a focused conversation where history isn't needed.
|
||||
Toggle any layer off to save tokens for a focused conversation.
|
||||
|
||||
### Memory Distillation (manual)
|
||||
### Memory Distillation
|
||||
|
||||
Distillation builds up the memory layers from raw session logs. Currently **manual** — trigger via the ⚙ panel:
|
||||
Distillation builds up the memory layers from raw session logs. Runs automatically on a schedule; trigger manually via the ⚙ panel:
|
||||
|
||||
| Button | What it does |
|
||||
|---|---|
|
||||
@@ -175,12 +262,7 @@ Distillation builds up the memory layers from raw session logs. Currently **manu
|
||||
| **long** | LLM integrates `MEMORY_MID.md` → `MEMORY_LONG.md` |
|
||||
| **all** | Runs short → mid → long in sequence |
|
||||
|
||||
**Recommended workflow:**
|
||||
- Run **short** after any productive session to capture it.
|
||||
- Run **mid** weekly to distil short → mid.
|
||||
- Run **long** monthly to absorb mid into permanent memory.
|
||||
|
||||
Token budgets for each layer are set in `.env` (`MEMORY_BUDGET_LONG`, `MEMORY_BUDGET_MID`, `MEMORY_BUDGET_SHORT`).
|
||||
**Recommended workflow:** run **short** after any productive session; **mid** weekly; **long** monthly.
|
||||
|
||||
---
|
||||
|
||||
@@ -192,9 +274,8 @@ Token budgets for each layer are set in `.env` (`MEMORY_BUDGET_LONG`, `MEMORY_BU
|
||||
| `Enter` | Send (when in Enter mode) |
|
||||
| `Shift+Enter` | New line in message input |
|
||||
| `Ctrl+Enter` | Save inline message edit |
|
||||
| `Esc` | Cancel inline edit |
|
||||
| `Esc` | Cancel inline edit / close any open modal |
|
||||
| `Ctrl+S` | Save file (Files modal) |
|
||||
| `Esc` | Close any open modal |
|
||||
|
||||
---
|
||||
|
||||
@@ -219,10 +300,11 @@ For direct access or scripting:
|
||||
| `POST` | `/distill/mid` | Summarize short → MEMORY_MID (LLM) |
|
||||
| `POST` | `/distill/long` | Integrate mid → MEMORY_LONG (LLM) |
|
||||
| `POST` | `/distill/all` | Run all three distillation steps |
|
||||
| `GET` | `/distill/status` | Show scheduler status and next run times |
|
||||
| `GET` | `/distill/status` | Scheduler status and next run times |
|
||||
| `POST` | `/orchestrate` | Submit an agent task — returns `{"job_id": "..."}` |
|
||||
| `GET` | `/orchestrate/{job_id}` | Poll job status and result |
|
||||
| `GET` | `/orchestrate` | List all jobs from current session (in-memory) |
|
||||
| `GET` | `/settings/models` | Model registry UI |
|
||||
| `POST` | `/api/models/role` | Set a role assignment (JSON body) |
|
||||
| `GET` | `/health` | Health check — returns `{"status": "ok"}` |
|
||||
|
||||
Chat request body (`POST /chat`):
|
||||
@@ -230,8 +312,8 @@ Chat request body (`POST /chat`):
|
||||
{
|
||||
"message": "string",
|
||||
"session_id": "string | null",
|
||||
"tier": 1,
|
||||
"model": "claude | gemini | null",
|
||||
"tier": 2,
|
||||
"model": "claude | gemini | local | null",
|
||||
"include_long": true,
|
||||
"include_mid": true,
|
||||
"include_short": true
|
||||
@@ -240,23 +322,4 @@ Chat request body (`POST /chat`):
|
||||
|
||||
---
|
||||
|
||||
## In Progress / Planned
|
||||
|
||||
- **Ollama local model backend** — direct Ollama API support (no CLI wrapper); target host: scott_gaming via WireGuard
|
||||
- **Nextcloud Talk stabilization** — test end-to-end after restarts; complete bot registration docs
|
||||
- **Multi-user support** — per-user identity/memory files; currently single-user (Scott); Holly instance planned
|
||||
|
||||
### Recently Completed
|
||||
|
||||
- ✓ **Google Chat bot** — Workspace Add-on integration; DM and spaces; JWT verification; session persistence
|
||||
- ✓ **Agent mode** — Gemini tool loop + Claude responder, accessible via UI toggle
|
||||
- ✓ **Personal task management** — `task_list`, `task_create`, `task_update`, `task_complete` tools backed by `TASKS.json`
|
||||
- ✓ **Web search fixed** — DDG package updated (`ddgs`); `WebSearch`/`WebFetch` allowed for Claude CLI fallback
|
||||
- ✓ **Session persistence for orchestrator** — agent mode turns now survive page refresh
|
||||
- ✓ **Systemd user service** — Cortex runs as a user service; no sudo required (`systemctl --user restart cortex`)
|
||||
- ✓ **OAuth token warning banner** — amber banner when Claude CLI token is within 24h of expiry
|
||||
|
||||
---
|
||||
|
||||
*Cortex is Scott's personal AI orchestration system. Inara is its primary resident agent.*
|
||||
*Built on FastAPI + Claude CLI + Gemini CLI. Named after Firefly.*
|
||||
*Cortex is a self-hosted personal AI platform. Named after the 'verse-wide communications network in Firefly.*
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
const backendToggle = document.getElementById('backend-toggle');
|
||||
const sessionsBtn = document.getElementById('sessions-btn');
|
||||
const sessionsPanel = document.getElementById('sessions-panel');
|
||||
const heightSel = document.getElementById('height-sel');
|
||||
const enterToggle = document.getElementById('enter-toggle');
|
||||
const stopBtn = document.getElementById('stop');
|
||||
const mode_select_btn_el = document.getElementById('mode-select-btn');
|
||||
@@ -14,6 +13,7 @@
|
||||
const mode_icon_el = document.getElementById('mode-icon');
|
||||
const mode_label_el = document.getElementById('mode-label');
|
||||
const note_vis_btn_el = document.getElementById('note-vis-btn');
|
||||
const tools_toggle_el = document.getElementById('tools-toggle');
|
||||
const settings_btn_el = document.getElementById('settings-btn');
|
||||
const settings_dd_el = document.getElementById('settings-dropdown');
|
||||
const sessionsBackdrop = document.getElementById('sessions-backdrop');
|
||||
@@ -25,6 +25,8 @@
|
||||
if (sessionsPanel) { sessionsPanel.classList.remove('open'); sessionsBackdrop.classList.remove('open'); }
|
||||
const pd = document.getElementById('persona-dropdown');
|
||||
if (pd) pd.classList.remove('open');
|
||||
const cp = document.getElementById('ctx-panel');
|
||||
if (cp) cp.classList.remove('open');
|
||||
}
|
||||
|
||||
// ── Toasts ────────────────────────────────────────────────────
|
||||
@@ -133,40 +135,68 @@
|
||||
});
|
||||
|
||||
// ── Textarea height ──────────────────────────────────────────
|
||||
const HEIGHT_SIZES = [120, 240, 480];
|
||||
const HEIGHT_LABELS = ['S', 'M', 'L'];
|
||||
const HEIGHT_TITLES = [
|
||||
'Input size: Compact — click to cycle',
|
||||
'Input size: Medium — click to cycle',
|
||||
'Input size: Large — click to cycle',
|
||||
];
|
||||
|
||||
let maxHeight = parseInt(localStorage.getItem('maxHeight') || '120');
|
||||
const heightCycleBtn = document.getElementById('height-cycle-btn');
|
||||
|
||||
function syncHeight() {
|
||||
inputEl.style.transition = '';
|
||||
inputEl.style.height = 'auto';
|
||||
inputEl.style.maxHeight = maxHeight + 'px';
|
||||
const sh = inputEl.scrollHeight;
|
||||
inputEl.style.height = Math.min(sh, maxHeight) + 'px';
|
||||
// Minimum height is 1/3 of maxHeight so each setting is visually distinct
|
||||
const minH = Math.round(maxHeight / 3);
|
||||
inputEl.style.height = Math.max(Math.min(sh, maxHeight), minH) + 'px';
|
||||
}
|
||||
|
||||
heightSel.value = String(maxHeight);
|
||||
heightSel.addEventListener('change', () => {
|
||||
maxHeight = parseInt(heightSel.value);
|
||||
localStorage.setItem('maxHeight', maxHeight);
|
||||
syncHeight();
|
||||
});
|
||||
const modeSelectEl = document.getElementById('mode-select');
|
||||
|
||||
function updateHeightUI() {
|
||||
if (!heightCycleBtn) return;
|
||||
const idx = HEIGHT_SIZES.indexOf(maxHeight);
|
||||
const i = idx >= 0 ? idx : 0;
|
||||
heightCycleBtn.textContent = HEIGHT_LABELS[i];
|
||||
heightCycleBtn.title = HEIGHT_TITLES[i];
|
||||
// Drive row/column layout via data attribute
|
||||
if (modeSelectEl) modeSelectEl.dataset.size = HEIGHT_LABELS[i].toLowerCase();
|
||||
}
|
||||
|
||||
if (heightCycleBtn) {
|
||||
heightCycleBtn.addEventListener('click', () => {
|
||||
const idx = HEIGHT_SIZES.indexOf(maxHeight);
|
||||
const nextIdx = (idx + 1) % HEIGHT_SIZES.length;
|
||||
maxHeight = HEIGHT_SIZES[nextIdx];
|
||||
localStorage.setItem('maxHeight', maxHeight);
|
||||
updateHeightUI();
|
||||
syncHeight();
|
||||
});
|
||||
}
|
||||
|
||||
// ── Input mode — dropdown select with MRU ordering ──────────
|
||||
const MODES = {
|
||||
chat: { icon: 'message-circle', label: 'Chat' },
|
||||
note: { icon: 'pencil', label: 'Note' },
|
||||
otr: { icon: 'lock', label: 'OTR' },
|
||||
agent: { icon: 'bot', label: 'Agent' },
|
||||
chat: { icon: 'message-circle', label: 'Chat' },
|
||||
note: { icon: 'pencil', label: 'Note' },
|
||||
otr: { icon: 'lock', label: 'OTR' },
|
||||
};
|
||||
const send_defs = {
|
||||
chat: { icon: 'arrow-up', label: 'Send' },
|
||||
note: { icon: 'pencil', label: 'Note' },
|
||||
otr: { icon: 'arrow-up', label: 'Send' },
|
||||
agent: { icon: 'zap', label: 'Run' },
|
||||
chat: { icon: 'arrow-up', label: 'Send' },
|
||||
note: { icon: 'pencil', label: 'Note' },
|
||||
otr: { icon: 'arrow-up', label: 'Send' },
|
||||
};
|
||||
|
||||
let current_mode = localStorage.getItem('current_mode') || 'chat';
|
||||
if (!(current_mode in MODES)) current_mode = 'chat'; // migrate stored 'agent'
|
||||
let note_public = false;
|
||||
// MRU list — most recent first; used to sort dropdown options
|
||||
let mode_mru = JSON.parse(localStorage.getItem('mode_mru') || '["chat","note","otr","agent"]');
|
||||
let mode_mru = JSON.parse(localStorage.getItem('mode_mru') || '["chat","note","otr"]');
|
||||
mode_mru = mode_mru.filter(m => m in MODES); // strip stale 'agent' entries
|
||||
|
||||
function push_mru(mode) {
|
||||
mode_mru = [mode, ...mode_mru.filter(m => m !== mode)];
|
||||
@@ -219,7 +249,7 @@
|
||||
});
|
||||
|
||||
function update_mode_ui() {
|
||||
const m = MODES[current_mode];
|
||||
const m = MODES[current_mode] || MODES.chat;
|
||||
const sd = send_defs[current_mode] || send_defs.chat;
|
||||
|
||||
// Update trigger button
|
||||
@@ -235,13 +265,15 @@
|
||||
note_vis_btn_el.classList.toggle('pub', note_public);
|
||||
|
||||
// Textarea mode classes
|
||||
inputEl.classList.toggle('mode-note', current_mode === 'note');
|
||||
inputEl.classList.toggle('public', current_mode === 'note' && note_public);
|
||||
inputEl.classList.toggle('mode-otr', current_mode === 'otr');
|
||||
inputEl.classList.toggle('mode-agent', current_mode === 'agent');
|
||||
inputEl.classList.toggle('mode-note', current_mode === 'note');
|
||||
inputEl.classList.toggle('public', current_mode === 'note' && note_public);
|
||||
inputEl.classList.toggle('mode-otr', current_mode === 'otr');
|
||||
|
||||
// Send button label + icon
|
||||
sendBtn.innerHTML = icon_html(sd.icon) + ' ' + sd.label;
|
||||
// Send button label + icon (tools active → "Run", otherwise per-mode)
|
||||
const effectiveSd = toolsEnabled && current_mode !== 'note'
|
||||
? { icon: 'zap', label: 'Run' }
|
||||
: sd;
|
||||
sendBtn.innerHTML = icon_html(effectiveSd.icon) + ' ' + effectiveSd.label;
|
||||
|
||||
render_icons();
|
||||
updateInputPlaceholder();
|
||||
@@ -252,12 +284,14 @@
|
||||
inputEl.placeholder = note_public
|
||||
? 'Public note — LLM sees this next turn…'
|
||||
: 'Private note — only you see this…';
|
||||
} else if (current_mode === 'agent') {
|
||||
inputEl.placeholder = ctrlEnterMode
|
||||
? `Task for ${personaLabel}… (Gemini tool loop — Ctrl+Enter to run)`
|
||||
: `Task for ${personaLabel}… (Gemini tool loop)`;
|
||||
} else if (current_mode === 'otr') {
|
||||
inputEl.placeholder = 'Off the record — not logged or distilled…';
|
||||
inputEl.placeholder = toolsEnabled
|
||||
? `Task for ${personaLabel}… ⚡ tools + off the record`
|
||||
: 'Off the record — not logged or distilled…';
|
||||
} else if (toolsEnabled) {
|
||||
inputEl.placeholder = ctrlEnterMode
|
||||
? `Task for ${personaLabel}… ⚡ tools (Ctrl+Enter to run)`
|
||||
: `Task for ${personaLabel}… ⚡ tools`;
|
||||
} else {
|
||||
inputEl.placeholder = ctrlEnterMode
|
||||
? `Message ${personaLabel}… (Ctrl+Enter to send)`
|
||||
@@ -272,6 +306,26 @@
|
||||
update_mode_ui();
|
||||
});
|
||||
|
||||
// ── Tools toggle ─────────────────────────────────────────────
|
||||
// When on: submit goes to POST /orchestrate (Gemini tool loop → active role responds).
|
||||
// When off: submit goes to POST /chat (direct to active role, no tools).
|
||||
let toolsEnabled = localStorage.getItem('tools-enabled') === 'true';
|
||||
|
||||
function updateToolsToggleUI() {
|
||||
tools_toggle_el.classList.toggle('local-on', toolsEnabled);
|
||||
tools_toggle_el.title = toolsEnabled
|
||||
? '⚡ Tools enabled — click to disable'
|
||||
: 'Tools disabled — click to enable';
|
||||
update_mode_ui();
|
||||
}
|
||||
|
||||
tools_toggle_el.addEventListener('click', (e) => {
|
||||
e.stopPropagation();
|
||||
toolsEnabled = !toolsEnabled;
|
||||
localStorage.setItem('tools-enabled', toolsEnabled);
|
||||
updateToolsToggleUI();
|
||||
});
|
||||
|
||||
// ── Settings dropdown ─────────────────────────────────────────
|
||||
settings_btn_el.addEventListener('click', (e) => {
|
||||
e.stopPropagation();
|
||||
@@ -339,59 +393,48 @@
|
||||
document.addEventListener('click', () => personaDropEl.classList.remove('open'));
|
||||
}
|
||||
|
||||
// ── Backend toggle ───────────────────────────────────────────
|
||||
// null = "auto" — uses role-based routing from model registry
|
||||
// 'claude' / 'gemini' / 'local' = explicit override
|
||||
// ── Role toggle ──────────────────────────────────────────────
|
||||
// Cycles through roles that have a primary model assigned (excluding orchestrator).
|
||||
// Sends chat_role ("chat"|"coder"|"research"|...) in chat requests.
|
||||
// Falls back to "chat" when no roles are configured in the registry.
|
||||
|
||||
// On load only fetch local_model hint; don't override primaryBackend default (null)
|
||||
fetch('/backend').then(r => r.json()).then(d => {
|
||||
if (backendModelHint && d.local_model) {
|
||||
// Pre-fill hint in case user is already in local mode
|
||||
backendModelHint.textContent = d.local_model.label || d.local_model.model_name;
|
||||
}
|
||||
});
|
||||
|
||||
const BACKEND_CYCLE = [null, 'claude', 'gemini', 'local'];
|
||||
const BACKEND_CLASS = { claude: '', gemini: 'mem-on', local: 'local-on' };
|
||||
const TYPE_CLASS = { claude_cli: '', gemini_api: 'mem-on', gemini_cli: 'mem-on', local_openai: 'local-on' };
|
||||
const backendModelHint = document.getElementById('backend-model-hint');
|
||||
|
||||
function setBackendUI(backend, localModel) {
|
||||
primaryBackend = backend;
|
||||
backendToggle.textContent = backend === null ? 'auto' : backend;
|
||||
const extra = backend === null ? '' : (BACKEND_CLASS[backend] || '');
|
||||
backendToggle.className = 'ctx-btn' + (extra ? ' ' + extra : '');
|
||||
let availableRoles = []; // [{role, label, model_label, type}] from /backend
|
||||
let roleIdx = 0;
|
||||
|
||||
function activeRole() {
|
||||
return availableRoles.length > 0 ? availableRoles[roleIdx] : null;
|
||||
}
|
||||
|
||||
function setRoleToggleUI(entry) {
|
||||
if (!entry) {
|
||||
backendToggle.textContent = 'chat';
|
||||
backendToggle.className = 'ctx-btn';
|
||||
} else {
|
||||
backendToggle.textContent = entry.label;
|
||||
backendToggle.className = 'ctx-btn ' + (TYPE_CLASS[entry.type] || '');
|
||||
}
|
||||
if (backendModelHint) {
|
||||
if (backend === 'local' && localModel) {
|
||||
backendModelHint.textContent = localModel.label || localModel.model_name;
|
||||
backendModelHint.style.display = '';
|
||||
} else {
|
||||
backendModelHint.textContent = '';
|
||||
backendModelHint.style.display = 'none';
|
||||
}
|
||||
const hint = entry?.model_label || '';
|
||||
backendModelHint.textContent = hint;
|
||||
backendModelHint.style.display = hint ? '' : 'none';
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize to auto mode
|
||||
setBackendUI(null, null);
|
||||
fetch('/backend').then(r => r.json()).then(d => {
|
||||
availableRoles = d.available_roles || [];
|
||||
roleIdx = 0;
|
||||
setRoleToggleUI(availableRoles[0] || null);
|
||||
});
|
||||
|
||||
backendToggle.addEventListener('click', async () => {
|
||||
const idx = BACKEND_CYCLE.indexOf(primaryBackend);
|
||||
const next = BACKEND_CYCLE[(idx + 1) % BACKEND_CYCLE.length];
|
||||
if (next === null) {
|
||||
// Auto: role-based routing — no server call needed
|
||||
setBackendUI(null, null);
|
||||
addMessage('system', 'Backend: auto (role-based routing)');
|
||||
} else {
|
||||
const res = await fetch('/backend', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ primary: next }),
|
||||
});
|
||||
const d = await res.json();
|
||||
setBackendUI(next, d.local_model);
|
||||
addMessage('system', `Backend: ${next} (fallback: ${d.fallback})`);
|
||||
}
|
||||
backendToggle.addEventListener('click', () => {
|
||||
if (availableRoles.length <= 1) return;
|
||||
roleIdx = (roleIdx + 1) % availableRoles.length;
|
||||
const entry = availableRoles[roleIdx];
|
||||
setRoleToggleUI(entry);
|
||||
addMessage('system', `Role: ${entry.label} · ${entry.model_label}`);
|
||||
});
|
||||
|
||||
// ── Sessions panel ───────────────────────────────────────────
|
||||
@@ -1066,7 +1109,7 @@
|
||||
include_mid: memMid,
|
||||
include_short: memShort,
|
||||
off_record: current_mode === 'otr',
|
||||
model: primaryBackend,
|
||||
chat_role: activeRole()?.role || 'chat',
|
||||
user: CORTEX_USER,
|
||||
persona: CORTEX_PERSONA,
|
||||
};
|
||||
@@ -1109,6 +1152,7 @@
|
||||
include_long: memLong,
|
||||
include_mid: memMid,
|
||||
include_short: memShort,
|
||||
chat_role: activeRole()?.role || 'chat',
|
||||
user: CORTEX_USER,
|
||||
persona: CORTEX_PERSONA,
|
||||
}),
|
||||
@@ -1182,7 +1226,7 @@
|
||||
|
||||
function dispatchSend() {
|
||||
if (current_mode === 'note') addNote();
|
||||
else if (current_mode === 'agent') sendOrchestrate();
|
||||
else if (toolsEnabled) sendOrchestrate();
|
||||
else sendMessage();
|
||||
}
|
||||
|
||||
@@ -1589,8 +1633,12 @@
|
||||
|
||||
ctxOpenBtn.addEventListener('click', (e) => {
|
||||
e.stopPropagation();
|
||||
ctxPanel.classList.toggle('open');
|
||||
if (ctxPanel.classList.contains('open')) loadSchedule();
|
||||
const isOpen = ctxPanel.classList.contains('open');
|
||||
closeAllPanels();
|
||||
if (!isOpen) {
|
||||
ctxPanel.classList.add('open');
|
||||
loadSchedule();
|
||||
}
|
||||
});
|
||||
|
||||
document.addEventListener('click', (e) => {
|
||||
@@ -1647,6 +1695,8 @@
|
||||
|
||||
updateTierUI();
|
||||
updateMemUI();
|
||||
updateHeightUI();
|
||||
updateToolsToggleUI();
|
||||
update_mode_ui();
|
||||
|
||||
// ── Init ─────────────────────────────────────────────────────
|
||||
|
||||
@@ -90,32 +90,28 @@
|
||||
<div class="ctx-section">
|
||||
<div class="ctx-section-title">Distill Memory</div>
|
||||
<div class="ctx-row">
|
||||
<button class="ctx-btn" id="distill-short-btn" title="Roll session logs → MEMORY_SHORT (no LLM)">short</button>
|
||||
<button class="ctx-btn" id="distill-mid-btn" title="Summarize short → MEMORY_MID (LLM)">mid</button>
|
||||
<button class="ctx-btn" id="distill-long-btn" title="Integrate mid → MEMORY_LONG (LLM)">long</button>
|
||||
<button class="ctx-btn" id="distill-all-btn" title="Run all three steps in sequence">all</button>
|
||||
<button class="ctx-btn" id="distill-short-btn" title="Roll today's sessions → MEMORY_SHORT.md (fast, no LLM)">Short</button>
|
||||
<button class="ctx-btn" id="distill-mid-btn" title="Summarize SHORT → MID memory (uses LLM)">Mid</button>
|
||||
<button class="ctx-btn" id="distill-long-btn" title="Integrate MID → LONG memory (uses LLM)">Long</button>
|
||||
<button class="ctx-btn" id="distill-all-btn" title="Run Short → Mid → Long in sequence">All</button>
|
||||
</div>
|
||||
<div id="ctx-distill-status"></div>
|
||||
<div id="ctx-schedule"></div>
|
||||
</div>
|
||||
<div class="ctx-section">
|
||||
<div class="ctx-section-title">Backend</div>
|
||||
<div class="ctx-section-title">Role</div>
|
||||
<div class="ctx-row">
|
||||
<button id="backend-toggle" class="ctx-btn" title="Click to switch primary backend">claude</button>
|
||||
<button id="backend-toggle" class="ctx-btn" title="Active role — click to cycle">chat</button>
|
||||
</div>
|
||||
<div id="backend-model-hint"></div>
|
||||
</div>
|
||||
<div class="ctx-section">
|
||||
<div class="ctx-section-title">Display</div>
|
||||
<div class="ctx-row">
|
||||
<button id="font-size-btn" class="ctx-btn" title="Cycle font size: normal → large → small">Aa</button>
|
||||
<button id="theme-btn" class="ctx-btn" title="Toggle light/dark mode">☾</button>
|
||||
<select id="height-sel" class="ctx-btn" title="Max input height" style="cursor:pointer">
|
||||
<option value="120">5 lines</option>
|
||||
<option value="240">10 lines</option>
|
||||
<option value="480">20 lines</option>
|
||||
</select>
|
||||
<button id="enter-toggle" class="ctx-btn" title="Toggle send shortcut">⌃↵</button>
|
||||
<button id="font-size-btn" class="ctx-btn" title="Cycle font size: Normal → Large → Small">Aa</button>
|
||||
<button id="theme-btn" class="ctx-btn" title="Toggle light / dark theme">☾</button>
|
||||
<button id="height-cycle-btn" class="ctx-btn" title="Input size: Compact — click to cycle">S</button>
|
||||
<button id="enter-toggle" class="ctx-btn" title="Toggle send shortcut: Ctrl+Enter ↔ Enter">⌃↵</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -176,6 +172,8 @@
|
||||
<div id="mode-dropdown"></div>
|
||||
<!-- Note visibility sub-toggle — only shown when note mode is active -->
|
||||
<button id="note-vis-btn" title="Toggle note visibility (private / public)">prv</button>
|
||||
<!-- Tools toggle — routes through the orchestrator tool loop when active -->
|
||||
<button id="tools-toggle" title="Tools disabled — click to enable">⚡</button>
|
||||
</div>
|
||||
<textarea id="input" rows="1" placeholder="Message…" autofocus></textarea>
|
||||
<div id="send-col">
|
||||
|
||||
@@ -9,30 +9,21 @@
|
||||
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@100..900&display=swap" rel="stylesheet">
|
||||
<style>
|
||||
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
|
||||
body {
|
||||
min-height: 100vh;
|
||||
background: #0f1117;
|
||||
font-family: 'Inter', system-ui, -apple-system, sans-serif;
|
||||
font-weight: 450;
|
||||
-webkit-font-smoothing: antialiased;
|
||||
color: #e2e8f0;
|
||||
min-height: 100vh; background: #0f1117;
|
||||
font-family: 'Inter', system-ui, sans-serif; font-weight: 450;
|
||||
-webkit-font-smoothing: antialiased; color: #e2e8f0;
|
||||
padding: 2rem 1.5rem 4rem;
|
||||
}
|
||||
|
||||
.page { max-width: 700px; margin: 0 auto; }
|
||||
|
||||
/* ── Nav ── */
|
||||
.page-nav {
|
||||
display: flex; align-items: center; gap: 0.25rem;
|
||||
margin-bottom: 1.75rem; flex-wrap: wrap;
|
||||
}
|
||||
/* Nav */
|
||||
.page-nav { display: flex; align-items: center; gap: 0.25rem; margin-bottom: 1.75rem; flex-wrap: wrap; }
|
||||
.nav-link {
|
||||
display: inline-flex; align-items: center;
|
||||
padding: 0.3rem 0.6rem; border-radius: 6px;
|
||||
font-size: 0.8rem; font-weight: 500; color: #64748b;
|
||||
text-decoration: none; transition: color 0.15s, background 0.15s;
|
||||
white-space: nowrap;
|
||||
text-decoration: none; transition: color 0.15s, background 0.15s; white-space: nowrap;
|
||||
}
|
||||
.nav-link:hover { color: #cbd5e1; background: rgba(255,255,255,0.05); }
|
||||
.nav-link.active { color: #a78bfa; }
|
||||
@@ -40,12 +31,12 @@
|
||||
.nav-link.nav-logout { color: #475569; }
|
||||
.nav-link.nav-logout:hover { color: #94a3b8; background: none; }
|
||||
|
||||
/* ── Page header ── */
|
||||
/* Page header */
|
||||
.page-header { margin-bottom: 2rem; padding-bottom: 1rem; border-bottom: 1px solid #2d3148; }
|
||||
.page-header h1 { font-size: 1.4rem; font-weight: 700; color: #a78bfa; }
|
||||
.page-header p { font-size: 0.82rem; color: #94a3b8; margin-top: 0.25rem; }
|
||||
|
||||
/* ── Section cards ── */
|
||||
/* Section cards */
|
||||
.section {
|
||||
background: #1a1d27; border: 1px solid #2d3148;
|
||||
border-radius: 10px; padding: 1.5rem; margin-bottom: 1.25rem;
|
||||
@@ -56,16 +47,37 @@
|
||||
margin-bottom: 1.1rem; padding-bottom: 0.5rem;
|
||||
border-bottom: 1px solid #2d3148;
|
||||
}
|
||||
.section-note {
|
||||
font-size: 0.8rem; color: #64748b; margin-bottom: 1rem; line-height: 1.5;
|
||||
}
|
||||
.section-note { font-size: 0.8rem; color: #64748b; margin-bottom: 1rem; line-height: 1.5; }
|
||||
|
||||
/* ── Form elements ── */
|
||||
.field { margin-bottom: 0.9rem; }
|
||||
label {
|
||||
display: block; font-size: 0.78rem; font-weight: 500;
|
||||
color: #94a3b8; margin-bottom: 0.35rem;
|
||||
/* Provider sub-sections */
|
||||
.provider-block { margin-bottom: 1.25rem; }
|
||||
.provider-block:last-child { margin-bottom: 0; }
|
||||
.provider-header {
|
||||
display: flex; align-items: center; gap: 0.6rem;
|
||||
margin-bottom: 0.75rem;
|
||||
}
|
||||
.provider-icon {
|
||||
width: 1.6rem; height: 1.6rem; border-radius: 5px;
|
||||
display: flex; align-items: center; justify-content: center;
|
||||
font-size: 0.75rem; font-weight: 700; flex-shrink: 0;
|
||||
}
|
||||
.pi-anthropic { background: #1e1b4b; color: #818cf8; }
|
||||
.pi-google { background: #042f2e; color: #34d399; }
|
||||
.provider-title { font-size: 0.9rem; font-weight: 600; color: #e2e8f0; }
|
||||
.provider-subtitle { font-size: 0.78rem; color: #64748b; }
|
||||
|
||||
/* Account rows */
|
||||
.account-row {
|
||||
display: flex; align-items: center; justify-content: space-between;
|
||||
padding: 0.6rem 0.9rem; background: #0f1117;
|
||||
border: 1px solid #2d3148; border-radius: 8px; margin-bottom: 0.5rem;
|
||||
}
|
||||
.account-label { font-size: 0.88rem; font-weight: 600; color: #e2e8f0; }
|
||||
.account-hint { font-size: 0.73rem; color: #475569; margin-left: 0.5rem; font-family: monospace; }
|
||||
|
||||
/* Form elements */
|
||||
.field { margin-bottom: 0.9rem; }
|
||||
label { display: block; font-size: 0.78rem; font-weight: 500; color: #94a3b8; margin-bottom: 0.35rem; }
|
||||
input[type="text"], input[type="password"], input[type="url"],
|
||||
input[type="number"], select {
|
||||
width: 100%; padding: 0.6rem 0.8rem;
|
||||
@@ -76,13 +88,11 @@
|
||||
input:focus, select:focus { border-color: #7c3aed; }
|
||||
select { cursor: pointer; }
|
||||
input[type="number"] { width: 6rem; }
|
||||
|
||||
.field-row { display: flex; gap: 0.75rem; }
|
||||
.field-row .field { flex: 1; margin-bottom: 0; }
|
||||
|
||||
.key-status { font-size: 0.75rem; color: #94a3b8; margin-top: 0.35rem; }
|
||||
|
||||
/* ── Buttons ── */
|
||||
/* Buttons */
|
||||
.btn {
|
||||
padding: 0.6rem 1.1rem; border: none; border-radius: 6px;
|
||||
font-size: 0.88rem; font-weight: 600; cursor: pointer;
|
||||
@@ -90,23 +100,41 @@
|
||||
}
|
||||
.btn-primary { background: #7c3aed; color: #fff; }
|
||||
.btn-primary:hover { background: #6d28d9; }
|
||||
.btn-secondary {
|
||||
background: #1a1d27; color: #94a3b8;
|
||||
border: 1px solid #2d3148;
|
||||
}
|
||||
.btn-secondary { background: #1a1d27; color: #94a3b8; border: 1px solid #2d3148; }
|
||||
.btn-secondary:hover { border-color: #94a3b8; color: #e2e8f0; }
|
||||
.btn-sm { padding: 0.35rem 0.7rem; font-size: 0.8rem; font-weight: 500; }
|
||||
.btn-row { display: flex; gap: 0.6rem; align-items: center; margin-top: 0.75rem; flex-wrap: wrap; }
|
||||
.btn-link {
|
||||
background: none; border: none; cursor: pointer; font-family: inherit;
|
||||
font-size: 0.78rem; color: #64748b; padding: 0; text-decoration: underline;
|
||||
text-underline-offset: 2px;
|
||||
font-size: 0.78rem; color: #64748b; padding: 0;
|
||||
text-decoration: underline; text-underline-offset: 2px;
|
||||
}
|
||||
.btn-link:hover { color: #94a3b8; }
|
||||
.btn-link.danger { color: #7f1d1d; }
|
||||
.btn-link.danger:hover { color: #f87171; }
|
||||
|
||||
/* ── Host rows ── */
|
||||
/* Provider tabs */
|
||||
.ptabs { display: flex; gap: 0; margin-bottom: 1.1rem; border-bottom: 1px solid #2d3148; }
|
||||
.ptab {
|
||||
padding: 0.45rem 0.9rem; font-size: 0.82rem; font-weight: 500;
|
||||
background: none; border: none; cursor: pointer; color: #64748b;
|
||||
border-bottom: 2px solid transparent; margin-bottom: -1px;
|
||||
transition: color 0.15s, border-color 0.15s; font-family: inherit;
|
||||
}
|
||||
.ptab.active { color: #a78bfa; border-bottom-color: #a78bfa; }
|
||||
.ptab:hover:not(.active) { color: #cbd5e1; }
|
||||
|
||||
/* Provider badges on model rows */
|
||||
.pbadge {
|
||||
display: inline-block; padding: 0.1rem 0.35rem; border-radius: 3px;
|
||||
font-size: 0.65rem; font-weight: 600; margin-right: 0.35rem;
|
||||
vertical-align: middle;
|
||||
}
|
||||
.pb-anthropic { background: #1e1b4b; color: #818cf8; }
|
||||
.pb-google { background: #042f2e; color: #34d399; }
|
||||
.pb-local { background: #1e293b; color: #64748b; }
|
||||
|
||||
/* Host & model rows */
|
||||
.host-row {
|
||||
background: #0f1117; border: 1px solid #2d3148; border-radius: 8px;
|
||||
padding: 1rem; margin-bottom: 0.75rem;
|
||||
@@ -116,7 +144,6 @@
|
||||
.fetch-status.ok { color: #4ade80; }
|
||||
.fetch-status.err { color: #f87171; }
|
||||
|
||||
/* ── Model rows ── */
|
||||
.model-row {
|
||||
display: flex; align-items: flex-start; justify-content: space-between;
|
||||
gap: 0.75rem; padding: 0.75rem 0.9rem;
|
||||
@@ -128,39 +155,31 @@
|
||||
.model-name { font-size: 0.75rem; color: #64748b; font-family: monospace; word-break: break-all; }
|
||||
.model-host { font-size: 0.72rem; color: #475569; }
|
||||
.ctx-badge {
|
||||
display: inline-block; margin-left: 0.4rem;
|
||||
padding: 0.1rem 0.35rem; border-radius: 3px;
|
||||
background: #1e293b; color: #64748b;
|
||||
font-size: 0.67rem; font-weight: 600;
|
||||
display: inline-block; margin-left: 0.35rem;
|
||||
padding: 0.1rem 0.3rem; border-radius: 3px;
|
||||
background: #1e293b; color: #64748b; font-size: 0.65rem; font-weight: 600;
|
||||
vertical-align: middle;
|
||||
}
|
||||
.tag-row { display: flex; flex-wrap: wrap; gap: 0.3rem; margin-top: 0.2rem; }
|
||||
.tag {
|
||||
padding: 0.1rem 0.4rem; border-radius: 3px;
|
||||
background: #1e1b4b; color: #818cf8;
|
||||
font-size: 0.68rem; font-weight: 500;
|
||||
}
|
||||
.model-actions { display: flex; gap: 0.4rem; flex-shrink: 0; }
|
||||
.tag { padding: 0.1rem 0.4rem; border-radius: 3px; background: #1e1b4b; color: #818cf8; font-size: 0.68rem; font-weight: 500; }
|
||||
.row-btn {
|
||||
padding: 0.3rem 0.65rem; border-radius: 5px; font-size: 0.78rem;
|
||||
font-weight: 500; cursor: pointer; font-family: inherit;
|
||||
border: 1px solid #2d3148; background: #1a1d27; color: #94a3b8;
|
||||
transition: border-color 0.15s, color 0.15s;
|
||||
transition: border-color 0.15s, color 0.15s; flex-shrink: 0;
|
||||
}
|
||||
.row-btn.danger { color: #f87171; }
|
||||
.row-btn.danger:hover { border-color: #f87171; }
|
||||
|
||||
/* ── Role assignment rows ── */
|
||||
/* Role assignments */
|
||||
.role-row {
|
||||
display: flex; align-items: flex-start; gap: 1rem;
|
||||
padding: 0.6rem 0; border-bottom: 1px solid #1e2030;
|
||||
}
|
||||
.role-row:last-child { border-bottom: none; }
|
||||
.role-name {
|
||||
font-size: 0.82rem; font-weight: 600; color: #a78bfa;
|
||||
min-width: 6rem; padding-top: 0.45rem;
|
||||
}
|
||||
.role-name { font-size: 0.82rem; font-weight: 600; color: #a78bfa; min-width: 6rem; padding-top: 0.45rem; }
|
||||
.role-slots { display: flex; flex-wrap: wrap; gap: 0.5rem; flex: 1; }
|
||||
.role-slot { display: flex; flex-direction: column; gap: 0.2rem; flex: 1; min-width: 8rem; }
|
||||
.role-slot { display: flex; flex-direction: column; gap: 0.2rem; flex: 1; min-width: 8rem; }
|
||||
.slot-label { font-size: 0.68rem; color: #475569; font-weight: 500; text-transform: uppercase; letter-spacing: 0.04em; }
|
||||
.role-select {
|
||||
padding: 0.4rem 0.6rem; font-size: 0.8rem;
|
||||
@@ -168,36 +187,31 @@
|
||||
color: #e2e8f0; font-family: inherit; cursor: pointer; outline: none;
|
||||
transition: border-color 0.15s;
|
||||
}
|
||||
.role-select:focus { border-color: #7c3aed; }
|
||||
.role-select:focus { border-color: #7c3aed; }
|
||||
.role-select.saved { border-color: #166534; }
|
||||
.role-select.saving { border-color: #92400e; }
|
||||
.role-select.err { border-color: #7f1d1d; }
|
||||
|
||||
/* ── Add model section ── */
|
||||
#add-section .field-row { margin-bottom: 0.5rem; }
|
||||
/* Model select picker */
|
||||
#model-select-wrap { display: none; margin-bottom: 0.75rem; }
|
||||
.tags-hint { font-size: 0.72rem; color: #475569; margin-top: 0.3rem; }
|
||||
|
||||
/* ── Messages ── */
|
||||
.msg {
|
||||
font-size: 0.85rem; text-align: center;
|
||||
padding: 0.6rem 1rem; border-radius: 6px; margin-bottom: 1rem;
|
||||
}
|
||||
/* Messages & Toast */
|
||||
.msg { font-size: 0.85rem; text-align: center; padding: 0.6rem 1rem; border-radius: 6px; margin-bottom: 1rem; }
|
||||
.msg.success { color: #4ade80; background: #052e16; border: 1px solid #166534; }
|
||||
.msg.error { color: #f87171; background: #2d0a0a; border: 1px solid #7f1d1d; }
|
||||
|
||||
/* ── Toast ── */
|
||||
#toast {
|
||||
position: fixed; bottom: 1.5rem; right: 1.5rem;
|
||||
background: #1a1d27; border: 1px solid #166534; color: #4ade80;
|
||||
padding: 0.5rem 1rem; border-radius: 6px; font-size: 0.82rem;
|
||||
opacity: 0; transition: opacity 0.2s; pointer-events: none;
|
||||
z-index: 100;
|
||||
opacity: 0; transition: opacity 0.2s; pointer-events: none; z-index: 100;
|
||||
}
|
||||
#toast.show { opacity: 1; }
|
||||
#toast.err { border-color: #7f1d1d; color: #f87171; }
|
||||
#toast.err { border-color: #7f1d1d; color: #f87171; }
|
||||
|
||||
.empty-note { font-size: 0.85rem; color: #475569; padding: 0.3rem 0; }
|
||||
details summary { font-size: 0.82rem; color: #64748b; cursor: pointer; user-select: none; margin-top: 0.75rem; }
|
||||
details > div { margin-top: 0.75rem; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
@@ -206,53 +220,104 @@
|
||||
<a href="/" class="nav-link">← Chat</a>
|
||||
<a href="/help" class="nav-link">Help</a>
|
||||
<a href="/settings" class="nav-link">Settings</a>
|
||||
<a href="/settings/local" class="nav-link active">Models</a>
|
||||
<a href="/settings/models" class="nav-link active">Models</a>
|
||||
<span class="nav-spacer"></span>
|
||||
<a href="/logout" class="nav-link nav-logout">Sign out</a>
|
||||
</nav>
|
||||
|
||||
<div class="page-header">
|
||||
<h1>Model Registry</h1>
|
||||
<p>Configure hosts, models, and which model handles each task type.</p>
|
||||
<p>Configure providers, hosts, and model assignments.</p>
|
||||
</div>
|
||||
|
||||
<!-- SUCCESS -->
|
||||
<!-- ERROR -->
|
||||
<!-- SUCCESS --><!-- ERROR -->
|
||||
|
||||
<!-- ── Hosts ── -->
|
||||
<!-- ── Cloud Providers ── -->
|
||||
<div class="section">
|
||||
<h2>Hosts</h2>
|
||||
<p class="section-note">OpenAI-compatible API servers (Open WebUI, Ollama, LM Studio, etc.)</p>
|
||||
<h2>Cloud Providers</h2>
|
||||
|
||||
<div class="provider-block">
|
||||
<div class="provider-header">
|
||||
<div class="provider-icon pi-anthropic">A</div>
|
||||
<div>
|
||||
<div class="provider-title">Anthropic</div>
|
||||
<div class="provider-subtitle">Claude via CLI (OAuth) — no API key needed</div>
|
||||
</div>
|
||||
</div>
|
||||
<p class="section-note" style="margin-bottom:0">
|
||||
Claude models are accessed through the Claude CLI using your existing OAuth login.
|
||||
Run <code style="font-family:monospace;color:#94a3b8">claude auth login</code> to authenticate.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div class="provider-block" style="border-top:1px solid #2d3148; padding-top:1.25rem">
|
||||
<div class="provider-header">
|
||||
<div class="provider-icon pi-google">G</div>
|
||||
<div>
|
||||
<div class="provider-title">Google</div>
|
||||
<div class="provider-subtitle">Gemini models via Gemini API</div>
|
||||
</div>
|
||||
</div>
|
||||
{{ google_account_rows }}
|
||||
<details>
|
||||
<summary>+ Add Google account</summary>
|
||||
<div>
|
||||
<form method="POST" action="/settings/local/google-account">
|
||||
<input type="hidden" name="account_id" value="">
|
||||
<div class="field-row">
|
||||
<div class="field">
|
||||
<label>Label <span style="color:#475569;font-weight:400">(e.g. Work, Personal)</span></label>
|
||||
<input type="text" name="label" placeholder="One Sky IT"
|
||||
autocomplete="off" data-form-type="other">
|
||||
</div>
|
||||
<div class="field" style="flex:2">
|
||||
<label>API Key</label>
|
||||
<input type="password" name="api_key" placeholder="AIza…"
|
||||
autocomplete="new-password" data-1p-ignore data-lpignore="true"
|
||||
data-form-type="other">
|
||||
</div>
|
||||
</div>
|
||||
<div class="btn-row">
|
||||
<button type="submit" class="btn btn-primary btn-sm">Add Account</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</details>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- ── Local Hosts ── -->
|
||||
<div class="section">
|
||||
<h2>Local Hosts</h2>
|
||||
<p class="section-note">OpenAI-compatible API servers (Open WebUI, Ollama, LM Studio, OpenRouter, etc.)</p>
|
||||
{{ host_rows }}
|
||||
<details style="margin-top:0.75rem">
|
||||
<summary style="font-size:0.82rem; color:#64748b; cursor:pointer; user-select:none">+ Add host</summary>
|
||||
<div style="margin-top:0.75rem">
|
||||
<details>
|
||||
<summary>+ Add host</summary>
|
||||
<div>
|
||||
<form method="POST" action="/settings/local/host">
|
||||
<input type="hidden" name="host_id" value="">
|
||||
<div class="field-row">
|
||||
<div class="field">
|
||||
<label for="new-host-label">Label</label>
|
||||
<input type="text" id="new-host-label" name="label"
|
||||
placeholder="e.g. Gaming Laptop"
|
||||
<label>Label</label>
|
||||
<input type="text" name="label" placeholder="Gaming Laptop"
|
||||
autocomplete="off" data-form-type="other">
|
||||
</div>
|
||||
<div class="field" style="flex:2">
|
||||
<label for="new-host-url">API URL</label>
|
||||
<input type="text" id="new-host-url" name="api_url"
|
||||
placeholder="http://192.168.x.x:3000"
|
||||
<label>API URL</label>
|
||||
<input type="text" name="api_url" placeholder="http://192.168.x.x:3000"
|
||||
autocomplete="off" spellcheck="false" data-form-type="other">
|
||||
</div>
|
||||
</div>
|
||||
<div class="field-row">
|
||||
<div class="field">
|
||||
<label for="new-host-key">API Key</label>
|
||||
<input type="password" id="new-host-key" name="api_key"
|
||||
placeholder="sk-… (leave blank if not required)"
|
||||
autocomplete="new-password" data-1p-ignore data-lpignore="true" data-form-type="other">
|
||||
<label>API Key</label>
|
||||
<input type="password" name="api_key" placeholder="sk-… (leave blank if not required)"
|
||||
autocomplete="new-password" data-1p-ignore data-lpignore="true"
|
||||
data-form-type="other">
|
||||
</div>
|
||||
<div class="field" style="flex:0 0 auto">
|
||||
<label for="new-host-type">Type</label>
|
||||
<select id="new-host-type" name="host_type">
|
||||
<label>Type</label>
|
||||
<select name="host_type">
|
||||
<option value="openwebui">Open WebUI / Ollama</option>
|
||||
<option value="openai">OpenAI-compatible (OpenRouter, etc.)</option>
|
||||
</select>
|
||||
@@ -273,57 +338,92 @@
|
||||
</div>
|
||||
|
||||
<!-- ── Add Model ── -->
|
||||
<div class="section" id="add-section"{{ add_model_hidden }}>
|
||||
<div class="section">
|
||||
<h2>Add Model</h2>
|
||||
<div id="model-select-wrap">
|
||||
<div class="field">
|
||||
<label for="model-picker">Available on host</label>
|
||||
<select id="model-picker">
|
||||
<option value="">— select to auto-fill —</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="ptabs" id="provider-tabs">
|
||||
<button type="button" class="ptab active" data-p="local">Local</button>
|
||||
<button type="button" class="ptab" data-p="google">Google</button>
|
||||
<button type="button" class="ptab" data-p="anthropic">Anthropic</button>
|
||||
</div>
|
||||
|
||||
<form method="POST" action="/settings/local/models/add" id="add-form">
|
||||
<input type="hidden" name="host_id" id="add-host-id" value="">
|
||||
<div class="field">
|
||||
<label for="add-host-select">Host</label>
|
||||
<select id="add-host-select" onchange="document.getElementById('add-host-id').value=this.value">
|
||||
{{ host_options }}
|
||||
</select>
|
||||
<input type="hidden" name="provider" id="add-provider-val" value="local">
|
||||
|
||||
<!-- LOCAL fields -->
|
||||
<div id="pf-local">
|
||||
<div id="model-select-wrap">
|
||||
<div class="field">
|
||||
<label>Available on host</label>
|
||||
<select id="model-picker">
|
||||
<option value="">— select to auto-fill —</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
<div class="field-row">
|
||||
<div class="field" style="flex:0 0 auto">
|
||||
<label>Host</label>
|
||||
<select id="add-host-select" name="host_id"></select>
|
||||
</div>
|
||||
<div class="field" style="flex:2">
|
||||
<label>Model name / ID</label>
|
||||
<input type="text" id="add-model-name" name="model_name"
|
||||
placeholder="e.g. gemma4:e4b"
|
||||
autocomplete="off" spellcheck="false" data-form-type="other">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="field-row">
|
||||
|
||||
<!-- GOOGLE fields -->
|
||||
<div id="pf-google" style="display:none">
|
||||
<div class="field-row">
|
||||
<div class="field">
|
||||
<label>Gemini model</label>
|
||||
<select id="add-gemini-model"></select>
|
||||
</div>
|
||||
<div class="field">
|
||||
<label>Account</label>
|
||||
<select id="add-google-account" name="account_id"></select>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- ANTHROPIC fields -->
|
||||
<div id="pf-anthropic" style="display:none">
|
||||
<div class="field">
|
||||
<label for="add-label">Label</label>
|
||||
<label>Claude model</label>
|
||||
<select id="add-claude-model"></select>
|
||||
</div>
|
||||
<p class="section-note" style="margin-top:-0.25rem">Uses Claude CLI (OAuth)</p>
|
||||
</div>
|
||||
|
||||
<!-- Hidden: cloud model name (set by JS from catalog pickers) -->
|
||||
<input type="hidden" id="cloud-model-name" name="cloud_model_name" value="">
|
||||
<input type="hidden" name="credential_id" value="cli">
|
||||
|
||||
<!-- Shared fields -->
|
||||
<div class="field-row" style="margin-top:0.75rem">
|
||||
<div class="field">
|
||||
<label>Label</label>
|
||||
<input type="text" id="add-label" name="label"
|
||||
placeholder="e.g. Gemma 4 E4B"
|
||||
autocomplete="off" data-form-type="other">
|
||||
</div>
|
||||
<div class="field" style="flex:2">
|
||||
<label for="add-model-name">Model name</label>
|
||||
<input type="text" id="add-model-name" name="model_name"
|
||||
placeholder="e.g. gemma4:e4b"
|
||||
autocomplete="off" spellcheck="false" data-form-type="other">
|
||||
</div>
|
||||
</div>
|
||||
<div class="field-row">
|
||||
<div class="field" style="flex:0 0 auto">
|
||||
<label for="add-context-k">Context (k tokens)</label>
|
||||
<input type="number" id="add-context-k" name="context_k"
|
||||
value="0" min="0" max="10000">
|
||||
</div>
|
||||
<div class="field">
|
||||
<label for="add-tags">Tags <span style="color:#475569; font-weight:400">(comma-separated)</span></label>
|
||||
<input type="text" id="add-tags" name="tags"
|
||||
placeholder="fast, distill, coding"
|
||||
autocomplete="off" data-form-type="other">
|
||||
<p class="tags-hint">Informational labels — used for display and future filtering.</p>
|
||||
<label>Context (k tokens)</label>
|
||||
<input type="number" id="add-context-k" name="context_k" value="0" min="0" max="10000">
|
||||
</div>
|
||||
</div>
|
||||
<div class="field">
|
||||
<label>Tags <span style="color:#475569;font-weight:400">(comma-separated)</span></label>
|
||||
<input type="text" name="tags" placeholder="fast, distill, coding"
|
||||
autocomplete="off" data-form-type="other">
|
||||
<p class="tags-hint">Informational labels — used for display and future filtering.</p>
|
||||
</div>
|
||||
|
||||
<div class="btn-row">
|
||||
<button type="submit" class="btn btn-primary btn-sm">Add Model</button>
|
||||
<button type="button" id="fetch-btn" class="btn btn-secondary btn-sm">
|
||||
Fetch models from host
|
||||
</button>
|
||||
<button type="button" id="fetch-btn" class="btn btn-secondary btn-sm">Fetch from host</button>
|
||||
<span id="fetch-status" class="fetch-status"></span>
|
||||
</div>
|
||||
</form>
|
||||
@@ -333,9 +433,7 @@
|
||||
<div class="section">
|
||||
<h2>Role Assignments</h2>
|
||||
<p class="section-note">
|
||||
Choose which model handles each task type.
|
||||
Backups are tried in order if the primary fails or is unavailable.
|
||||
Leave a slot empty to use the server default (.env).
|
||||
Map each task type to a model. Primary is tried first; backups are used if primary fails or is unavailable.
|
||||
</p>
|
||||
{{ role_rows }}
|
||||
</div>
|
||||
@@ -344,19 +442,14 @@
|
||||
<div id="toast"></div>
|
||||
|
||||
<script>
|
||||
// ── Pre-fill role selects ─────────────────────────────────────────────────
|
||||
const ROLE_DATA = {{ role_data_js }};
|
||||
// ── Injected data ─────────────────────────────────────────────────────────
|
||||
const ROLE_DATA = {{ role_data_js }};
|
||||
const GOOGLE_ACCOUNTS = {{ google_accounts_js }};
|
||||
const GOOGLE_CATALOG = {{ google_catalog_js }};
|
||||
const ANTHROPIC_CATALOG = {{ anthropic_catalog_js }};
|
||||
const HAS_HOSTS = {{ has_hosts }};
|
||||
|
||||
document.querySelectorAll('.role-select').forEach(sel => {
|
||||
const role = sel.dataset.role;
|
||||
const slot = sel.dataset.slot;
|
||||
const val = (ROLE_DATA[role] || {})[slot] || '';
|
||||
for (const opt of sel.options) {
|
||||
if (opt.value === val) { opt.selected = true; break; }
|
||||
}
|
||||
});
|
||||
|
||||
// ── Role select change → AJAX save ───────────────────────────────────────
|
||||
// ── Role selects: pre-fill + AJAX save ────────────────────────────────────
|
||||
const toast = document.getElementById('toast');
|
||||
let toastTimer = null;
|
||||
|
||||
@@ -364,21 +457,22 @@
|
||||
toast.textContent = msg;
|
||||
toast.className = 'show' + (err ? ' err' : '');
|
||||
clearTimeout(toastTimer);
|
||||
toastTimer = setTimeout(() => { toast.className = ''; }, 2000);
|
||||
toastTimer = setTimeout(() => { toast.className = ''; }, 2500);
|
||||
}
|
||||
|
||||
document.querySelectorAll('.role-select').forEach(sel => {
|
||||
const val = (ROLE_DATA[sel.dataset.role] || {})[sel.dataset.slot] || '';
|
||||
for (const opt of sel.options) {
|
||||
if (opt.value === val) { opt.selected = true; break; }
|
||||
}
|
||||
sel.addEventListener('change', async () => {
|
||||
const role = sel.dataset.role;
|
||||
const slot = sel.dataset.slot;
|
||||
const { role, slot } = sel.dataset;
|
||||
const model_id = sel.value || null;
|
||||
|
||||
sel.classList.add('saving');
|
||||
try {
|
||||
const res = await fetch('/api/models/role', {
|
||||
method: 'POST',
|
||||
headers: {'Content-Type': 'application/json'},
|
||||
body: JSON.stringify({role, slot, model_id}),
|
||||
const res = await fetch('/api/models/role', {
|
||||
method: 'POST', headers: {'Content-Type':'application/json'},
|
||||
body: JSON.stringify({role, slot, model_id}),
|
||||
});
|
||||
const data = await res.json();
|
||||
if (data.ok) {
|
||||
@@ -397,27 +491,90 @@
|
||||
});
|
||||
});
|
||||
|
||||
// ── Fetch models from host ────────────────────────────────────────────────
|
||||
// Per-host "Fetch models" buttons in the host rows
|
||||
// ── Provider tabs ─────────────────────────────────────────────────────────
|
||||
const providerVal = document.getElementById('add-provider-val');
|
||||
const pfields = {
|
||||
local: document.getElementById('pf-local'),
|
||||
google: document.getElementById('pf-google'),
|
||||
anthropic: document.getElementById('pf-anthropic'),
|
||||
};
|
||||
const fetchBtn = document.getElementById('fetch-btn');
|
||||
|
||||
document.querySelectorAll('.ptab').forEach(tab => {
|
||||
tab.addEventListener('click', () => {
|
||||
document.querySelectorAll('.ptab').forEach(t => t.classList.remove('active'));
|
||||
tab.classList.add('active');
|
||||
const p = tab.dataset.p;
|
||||
providerVal.value = p;
|
||||
for (const [key, el] of Object.entries(pfields)) {
|
||||
el.style.display = key === p ? '' : 'none';
|
||||
}
|
||||
fetchBtn.style.display = p === 'local' ? '' : 'none';
|
||||
});
|
||||
});
|
||||
|
||||
// ── Populate catalog dropdowns ────────────────────────────────────────────
|
||||
function populateSelect(selEl, items, valKey, labelKey) {
|
||||
selEl.innerHTML = '<option value="">— select —</option>';
|
||||
items.forEach(item => {
|
||||
const opt = document.createElement('option');
|
||||
opt.value = item[valKey];
|
||||
opt.textContent = item[labelKey];
|
||||
opt.dataset.label = item.label || '';
|
||||
opt.dataset.ctx = item.context_k || 0;
|
||||
selEl.appendChild(opt);
|
||||
});
|
||||
}
|
||||
|
||||
const geminiSel = document.getElementById('add-gemini-model');
|
||||
const claudeSel = document.getElementById('add-claude-model');
|
||||
const gAcctSel = document.getElementById('add-google-account');
|
||||
|
||||
populateSelect(geminiSel, GOOGLE_CATALOG, 'id', 'label');
|
||||
populateSelect(claudeSel, ANTHROPIC_CATALOG, 'id', 'label');
|
||||
|
||||
if (GOOGLE_ACCOUNTS.length) {
|
||||
gAcctSel.innerHTML = '<option value="">— select account —</option>';
|
||||
GOOGLE_ACCOUNTS.forEach(a => {
|
||||
const opt = document.createElement('option');
|
||||
opt.value = a.id;
|
||||
opt.textContent = a.label || a.hint;
|
||||
gAcctSel.appendChild(opt);
|
||||
});
|
||||
} else {
|
||||
gAcctSel.innerHTML = '<option value="">No accounts configured — add one above</option>';
|
||||
}
|
||||
|
||||
function onCatalogChange(sel) {
|
||||
const opt = sel.options[sel.selectedIndex];
|
||||
if (!opt.value) return;
|
||||
document.getElementById('cloud-model-name').value = opt.value;
|
||||
document.getElementById('add-context-k').value = opt.dataset.ctx || 0;
|
||||
if (!document.getElementById('add-label').value) {
|
||||
document.getElementById('add-label').value = opt.dataset.label || '';
|
||||
}
|
||||
}
|
||||
geminiSel.addEventListener('change', () => onCatalogChange(geminiSel));
|
||||
claudeSel.addEventListener('change', () => onCatalogChange(claudeSel));
|
||||
|
||||
// ── Host select + fetch (local) ───────────────────────────────────────────
|
||||
const hostSel = document.getElementById('add-host-select');
|
||||
const hostOpts = `{{ host_options }}`;
|
||||
hostSel.innerHTML = hostOpts || '<option value="">No hosts configured</option>';
|
||||
|
||||
// Per-host "Fetch" buttons
|
||||
document.querySelectorAll('.fetch-btn').forEach(btn => {
|
||||
btn.addEventListener('click', () => fetchModels(btn.dataset.hostId, btn));
|
||||
});
|
||||
|
||||
// "Fetch models from host" in Add Model section (uses selected host)
|
||||
const globalFetchBtn = document.getElementById('fetch-btn');
|
||||
if (globalFetchBtn) {
|
||||
globalFetchBtn.addEventListener('click', () => {
|
||||
const hostSel = document.getElementById('add-host-select');
|
||||
const hostId = hostSel ? hostSel.value : '';
|
||||
fetchModels(hostId, globalFetchBtn, true);
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchModels(hostId, btn, fillAddForm = false) {
|
||||
const statusEl = fillAddForm
|
||||
? document.getElementById('fetch-status')
|
||||
: document.getElementById('fetch-' + hostId);
|
||||
fetchBtn.addEventListener('click', () => {
|
||||
const hostId = hostSel ? hostSel.value : '';
|
||||
fetchModels(hostId, fetchBtn, true);
|
||||
});
|
||||
|
||||
async function fetchModels(hostId, btn, fillPicker = false) {
|
||||
const statusEl = fillPicker ? document.getElementById('fetch-status')
|
||||
: document.getElementById('fetch-' + hostId);
|
||||
btn.disabled = true;
|
||||
if (statusEl) { statusEl.textContent = 'Fetching…'; statusEl.className = 'fetch-status'; }
|
||||
|
||||
@@ -425,27 +582,24 @@
|
||||
try {
|
||||
const res = await fetch(url);
|
||||
const data = await res.json();
|
||||
|
||||
if (data.error) {
|
||||
if (statusEl) { statusEl.textContent = '✗ ' + data.error; statusEl.className = 'fetch-status err'; }
|
||||
return;
|
||||
}
|
||||
|
||||
if (fillAddForm) {
|
||||
if (fillPicker) {
|
||||
const picker = document.getElementById('model-picker');
|
||||
const wrap = document.getElementById('model-select-wrap');
|
||||
picker.innerHTML = '<option value="">— select to auto-fill —</option>';
|
||||
for (const m of data.models) {
|
||||
data.models.forEach(m => {
|
||||
const opt = document.createElement('option');
|
||||
opt.value = m.id;
|
||||
opt.textContent = m.name !== m.id ? `${m.name} (${m.id})` : m.id;
|
||||
opt.dataset.id = m.id;
|
||||
opt.value = m.id;
|
||||
opt.textContent = m.name !== m.id ? `${m.name} (${m.id})` : m.id;
|
||||
opt.dataset.id = m.id;
|
||||
opt.dataset.name = m.name;
|
||||
picker.appendChild(opt);
|
||||
}
|
||||
});
|
||||
wrap.style.display = 'block';
|
||||
}
|
||||
|
||||
if (statusEl) {
|
||||
statusEl.textContent = `✓ ${data.models.length} model${data.models.length !== 1 ? 's' : ''}`;
|
||||
statusEl.className = 'fetch-status ok';
|
||||
@@ -457,27 +611,20 @@
|
||||
}
|
||||
}
|
||||
|
||||
// Auto-fill label + model name when a model is selected from the picker
|
||||
const picker = document.getElementById('model-picker');
|
||||
if (picker) {
|
||||
picker.addEventListener('change', () => {
|
||||
const opt = picker.options[picker.selectedIndex];
|
||||
if (!opt.value) return;
|
||||
const nameInput = document.getElementById('add-model-name');
|
||||
const labelInput = document.getElementById('add-label');
|
||||
nameInput.value = opt.dataset.id || opt.value;
|
||||
labelInput.value = (opt.dataset.name && opt.dataset.name !== opt.dataset.id)
|
||||
? opt.dataset.name : '';
|
||||
nameInput.focus();
|
||||
});
|
||||
}
|
||||
picker.addEventListener('change', () => {
|
||||
const opt = picker.options[picker.selectedIndex];
|
||||
if (!opt.value) return;
|
||||
document.getElementById('add-model-name').value = opt.dataset.id || opt.value;
|
||||
if (!document.getElementById('add-label').value) {
|
||||
const n = opt.dataset.name;
|
||||
document.getElementById('add-label').value = (n && n !== opt.dataset.id) ? n : '';
|
||||
}
|
||||
document.getElementById('add-model-name').focus();
|
||||
});
|
||||
|
||||
// Sync hidden host_id input from the visible select
|
||||
const addHostSel = document.getElementById('add-host-select');
|
||||
const addHostId = document.getElementById('add-host-id');
|
||||
if (addHostSel && addHostId) {
|
||||
addHostId.value = addHostSel.value;
|
||||
}
|
||||
// Hide fetch button initially if no hosts
|
||||
if (!HAS_HOSTS) fetchBtn.style.display = 'none';
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -269,44 +269,36 @@
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<!-- Gemini API key -->
|
||||
<!-- Browser cache -->
|
||||
<div class="section">
|
||||
<h2>Gemini API Key</h2>
|
||||
<h2>Browser Cache</h2>
|
||||
<p style="font-size:0.8rem; color:#94a3b8; margin-bottom:0.85rem; line-height:1.55;">
|
||||
Paste your personal key from
|
||||
<a href="https://aistudio.google.com/apikey" target="_blank" rel="noopener"
|
||||
style="color:#a78bfa;">aistudio.google.com/apikey</a>
|
||||
to use your own Gemini quota. Leave blank to use the shared server key.
|
||||
</p>
|
||||
<form method="POST" action="/settings/gemini-key">
|
||||
<div class="field">
|
||||
<label for="gemini_api_key">API Key</label>
|
||||
<input type="text" id="gemini_api_key" name="gemini_api_key"
|
||||
placeholder="{{ gemini_key_hint }}"
|
||||
autocomplete="new-password" spellcheck="false"
|
||||
data-1p-ignore data-lpignore="true" data-form-type="other">
|
||||
</div>
|
||||
<button type="submit">Save Key</button>
|
||||
</form>
|
||||
<p id="gemini-key-status" style="font-size:0.75rem; color:#94a3b8; margin-top:0.5rem;">
|
||||
Current: {{ gemini_key_hint }}
|
||||
<span id="gemini-remove-wrap" style="{{ gemini_key_set == 'false' and 'display:none' or '' }}">
|
||||
— <a href="#" id="gemini-remove-link" style="color:#f87171;">remove</a>
|
||||
</span>
|
||||
Clears UI preferences stored in this browser: active mode, session ID, memory toggles,
|
||||
theme, font size, and context tier. Does not sign you out.
|
||||
</p>
|
||||
<button type="button" id="clear-ls-btn"
|
||||
style="padding:0.5rem 1rem; background:none; border:1px solid #2d3148; border-radius:6px;
|
||||
color:#94a3b8; font-size:0.88rem; font-weight:500; cursor:pointer;
|
||||
transition:border-color 0.15s, color 0.15s;">
|
||||
Clear browser cache
|
||||
</button>
|
||||
<span id="clear-ls-ok" style="display:none; margin-left:0.75rem; font-size:0.8rem; color:#4ade80;">
|
||||
Cleared.
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<!-- Local models link -->
|
||||
<!-- Model Registry link -->
|
||||
<div class="section">
|
||||
<h2>Local Models</h2>
|
||||
<h2>Model Registry</h2>
|
||||
<p style="font-size:0.8rem; color:#94a3b8; margin-bottom:0.85rem; line-height:1.55;">
|
||||
Configure OpenAI-compatible hosts and models (Open WebUI, Ollama, LM Studio, etc.).
|
||||
Configure AI providers (Anthropic, Google), local hosts (Open WebUI, Ollama, OpenRouter, etc.),
|
||||
and assign models to roles — chat, orchestrator, distill, and more.
|
||||
</p>
|
||||
<a href="/settings/local"
|
||||
<a href="/settings/models"
|
||||
style="display:inline-block; padding:0.55rem 1rem; background:#7c3aed; border-radius:6px;
|
||||
color:#fff; font-size:0.88rem; font-weight:600; text-decoration:none;
|
||||
transition:background 0.15s;">
|
||||
Manage local models →
|
||||
Manage models →
|
||||
</a>
|
||||
</div>
|
||||
|
||||
@@ -375,6 +367,12 @@
|
||||
});
|
||||
}
|
||||
|
||||
// Clear localStorage (keeps JWT cookie — no sign-out)
|
||||
document.getElementById('clear-ls-btn').addEventListener('click', () => {
|
||||
localStorage.clear();
|
||||
document.getElementById('clear-ls-ok').style.display = 'inline';
|
||||
});
|
||||
|
||||
// Persona rename toggle
|
||||
document.querySelectorAll('.persona-rename-toggle').forEach(btn => {
|
||||
btn.addEventListener('click', () => {
|
||||
|
||||
@@ -21,6 +21,9 @@
|
||||
--pre-bg: rgba(0,0,0,0.35);
|
||||
--success: #6abf6a;
|
||||
--success-dim: #2a4a2a;
|
||||
--amber: #f59e0b;
|
||||
--amber-border: #92400e;
|
||||
--amber-glow: rgba(245,158,11,0.35);
|
||||
}
|
||||
|
||||
/* ── Light theme ─────────────────────────────────────────── */
|
||||
@@ -45,6 +48,9 @@
|
||||
--pre-bg: rgba(0,0,0,0.07);
|
||||
--success: #1e6e1e;
|
||||
--success-dim: #5aaa5a;
|
||||
--amber: #b45309;
|
||||
--amber-border: #92400e;
|
||||
--amber-glow: rgba(180,83,9,0.25);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -69,6 +75,9 @@
|
||||
--pre-bg: rgba(0,0,0,0.35);
|
||||
--success: #6abf6a;
|
||||
--success-dim: #2a4a2a;
|
||||
--amber: #f59e0b;
|
||||
--amber-border: #92400e;
|
||||
--amber-glow: rgba(245,158,11,0.35);
|
||||
}
|
||||
|
||||
[data-theme="light"] {
|
||||
@@ -91,6 +100,9 @@
|
||||
--pre-bg: rgba(0,0,0,0.07);
|
||||
--success: #1e6e1e;
|
||||
--success-dim: #5aaa5a;
|
||||
--amber: #b45309;
|
||||
--amber-border: #92400e;
|
||||
--amber-glow: rgba(180,83,9,0.25);
|
||||
}
|
||||
|
||||
body {
|
||||
@@ -223,7 +235,7 @@
|
||||
background: var(--surface);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 4px 16px rgba(0,0,0,0.4);
|
||||
box-shadow: 0 8px 24px var(--shadow);
|
||||
z-index: 200;
|
||||
overflow: hidden;
|
||||
}
|
||||
@@ -640,6 +652,16 @@
|
||||
gap: 4px;
|
||||
}
|
||||
|
||||
/* S: collapse to a single row — mode button + compact tools toggle */
|
||||
#mode-select[data-size="s"] {
|
||||
flex-direction: row;
|
||||
align-items: center;
|
||||
}
|
||||
#mode-select[data-size="s"] #tools-toggle {
|
||||
padding: 3px 7px;
|
||||
font-size: 0.75rem;
|
||||
}
|
||||
|
||||
#mode-select-btn {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
@@ -654,10 +676,9 @@
|
||||
white-space: nowrap;
|
||||
transition: border-color 0.15s, color 0.15s;
|
||||
}
|
||||
#mode-select-btn:hover { border-color: var(--muted); color: var(--text); }
|
||||
#mode-select-btn.mode-note { border-color: rgba(180,130,40,0.6); color: #c9a84c; }
|
||||
#mode-select-btn.mode-otr { border-color: rgba(120,80,160,0.6); color: #a87fd4; }
|
||||
#mode-select-btn.mode-agent { border-color: rgba(80,140,200,0.6); color: #7cb9e8; }
|
||||
#mode-select-btn:hover { border-color: var(--muted); color: var(--text); }
|
||||
#mode-select-btn.mode-note { border-color: rgba(180,130,40,0.6); color: #c9a84c; }
|
||||
#mode-select-btn.mode-otr { border-color: rgba(120,80,160,0.6); color: #a87fd4; }
|
||||
|
||||
#mode-icon { display: flex; align-items: center; }
|
||||
.mode-arrow { font-size: 0.55rem; color: var(--muted); margin-left: 2px; opacity: 0.5; }
|
||||
@@ -716,6 +737,26 @@
|
||||
color: rgba(40,170,150,0.75);
|
||||
}
|
||||
|
||||
/* Tools toggle — OFF: dim/muted; ON: amber with glow */
|
||||
#tools-toggle {
|
||||
background: var(--bg);
|
||||
border: 1px solid rgba(255,255,255,0.1);
|
||||
border-radius: 6px;
|
||||
color: rgba(255,255,255,0.2);
|
||||
font-size: 0.85rem;
|
||||
padding: 4px 8px;
|
||||
cursor: pointer;
|
||||
text-align: center;
|
||||
transition: color 0.15s, border-color 0.15s, box-shadow 0.15s;
|
||||
}
|
||||
#tools-toggle:hover { color: rgba(255,255,255,0.4); border-color: rgba(255,255,255,0.2); }
|
||||
#tools-toggle.local-on {
|
||||
color: var(--amber);
|
||||
border-color: var(--amber-border);
|
||||
box-shadow: 0 0 6px var(--amber-glow);
|
||||
}
|
||||
#tools-toggle.local-on:hover { box-shadow: 0 0 10px var(--amber-glow); }
|
||||
|
||||
#input {
|
||||
flex: 1;
|
||||
background: var(--bg);
|
||||
@@ -737,8 +778,7 @@
|
||||
#input.mode-note:focus { border-color: rgba(180,130,40,0.85); }
|
||||
#input.mode-note.public { border-color: rgba(40,170,150,0.55); }
|
||||
#input.mode-note.public:focus { border-color: rgba(40,170,150,0.85); }
|
||||
#input.mode-otr { border-color: rgba(120,80,160,0.4); background: rgba(120,80,160,0.04); }
|
||||
#input.mode-agent { border-color: rgba(80,140,200,0.4); }
|
||||
#input.mode-otr { border-color: rgba(120,80,160,0.4); background: rgba(120,80,160,0.04); }
|
||||
|
||||
/* Send column — right side, stacked */
|
||||
#send-col {
|
||||
@@ -1138,7 +1178,7 @@
|
||||
background: var(--surface);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 8px;
|
||||
z-index: 100;
|
||||
z-index: 200;
|
||||
box-shadow: 0 8px 24px var(--shadow);
|
||||
overflow: hidden;
|
||||
}
|
||||
@@ -1178,9 +1218,9 @@
|
||||
.ctx-btn:hover { color: var(--text); border-color: var(--muted); }
|
||||
.ctx-btn.active { color: var(--accent); border-color: var(--accent); }
|
||||
.ctx-btn.mem-on { color: var(--success); border-color: var(--success-dim); }
|
||||
.ctx-btn.local-on { color: #f59e0b; border-color: #92400e; }
|
||||
.ctx-btn.local-on { color: var(--amber); border-color: var(--amber-border); }
|
||||
#backend-model-hint {
|
||||
font-size: 0.68rem; color: #f59e0b; opacity: 0.8;
|
||||
font-size: 0.68rem; color: var(--amber); opacity: 0.9;
|
||||
margin-top: 4px; word-break: break-all; line-height: 1.3;
|
||||
}
|
||||
|
||||
@@ -1497,17 +1537,10 @@
|
||||
font-size: 16px; /* prevent iOS Safari auto-zoom */
|
||||
}
|
||||
|
||||
/* Mode select: row layout (btn left, note-vis right) */
|
||||
#mode-select {
|
||||
flex-direction: row;
|
||||
flex: 1;
|
||||
align-items: center;
|
||||
}
|
||||
/* Mode select: grows to fill left side of bottom row; back to row on mobile */
|
||||
#mode-select { flex: 1; flex-direction: row; align-items: center; }
|
||||
#mode-select-btn { flex: 1; justify-content: center; }
|
||||
|
||||
/* Note vis button sits to the right of the mode btn on mobile */
|
||||
#note-vis-btn { margin-top: 0; }
|
||||
|
||||
/* Dropdown still opens upward on mobile */
|
||||
#mode-dropdown { min-width: 140px; }
|
||||
|
||||
@@ -1579,3 +1612,4 @@
|
||||
.header-emoji { font-size: 1.3rem; }
|
||||
.hdr-btn { padding: 5px 8px; }
|
||||
}
|
||||
|
||||
|
||||
@@ -70,7 +70,7 @@ def test_empty_registry_no_files(tmp_path):
|
||||
import model_registry as reg
|
||||
with patch.object(config.settings, "home_dir", home):
|
||||
data = reg._load("scott")
|
||||
assert data["version"] == 1
|
||||
assert data["version"] == 2
|
||||
assert data["hosts"] == []
|
||||
assert data["models"] == []
|
||||
assert data["roles"] == {}
|
||||
@@ -244,7 +244,7 @@ def test_migration_saves_registry_file(tmp_path):
|
||||
data2 = reg._load("scott")
|
||||
|
||||
assert (home / "scott" / "model_registry.json").exists()
|
||||
assert data2["version"] == 1
|
||||
assert data2["version"] == 2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -17,10 +17,15 @@ IMPORTANT: These tools are separate from the ae_* MCP tools used by the fleet ag
|
||||
from google.genai import types
|
||||
from tools.web import search as _web_search
|
||||
from tools.ae_knowledge import journal_search as _ae_journal_search
|
||||
from tools.ae_knowledge import journal_list as _ae_journal_list
|
||||
from tools.ae_knowledge import journal_entry_create as _ae_journal_entry_create
|
||||
from tools.ae_knowledge import journal_entry_update as _ae_journal_entry_update
|
||||
from tools.ae_knowledge import journal_entry_disable as _ae_journal_entry_disable
|
||||
from tools.ae_knowledge import journal_entry_append as _ae_journal_entry_append
|
||||
from tools.ae_knowledge import journal_entry_prepend as _ae_journal_entry_prepend
|
||||
from tools.ae_tasks import task_list as _ae_task_list
|
||||
from tools.files import file_read as _file_read
|
||||
from tools.system import claude_allow_dir as _claude_allow_dir
|
||||
from tools.system import claude_allow_dir as _claude_allow_dir, shell_exec as _shell_exec
|
||||
from tools.tasks import task_list as _task_list, task_create as _task_create
|
||||
from tools.tasks import task_update as _task_update, task_complete as _task_complete
|
||||
from tools.cron import (
|
||||
@@ -68,6 +73,17 @@ _web_search_declaration = types.FunctionDeclaration(
|
||||
),
|
||||
)
|
||||
|
||||
_ae_journal_list_declaration = types.FunctionDeclaration(
|
||||
name="ae_journal_list",
|
||||
description=(
|
||||
"List all Aether Journals available for this account. "
|
||||
"Returns each journal's name and id_random. "
|
||||
"Call this first when you need to write a new entry or scope a search to a specific journal "
|
||||
"and don't already know the journal's id."
|
||||
),
|
||||
parameters=types.Schema(type=types.Type.OBJECT, properties={}),
|
||||
)
|
||||
|
||||
_ae_journal_search_declaration = types.FunctionDeclaration(
|
||||
name="ae_journal_search",
|
||||
description=(
|
||||
@@ -136,6 +152,79 @@ _ae_journal_entry_create_declaration = types.FunctionDeclaration(
|
||||
),
|
||||
)
|
||||
|
||||
_ae_journal_entry_update_declaration = types.FunctionDeclaration(
|
||||
name="ae_journal_entry_update",
|
||||
description=(
|
||||
"Update fields on an existing journal entry. Only the fields you provide are changed — "
|
||||
"omitted fields are left as-is. Use ae_journal_search to find the entry_id first. "
|
||||
"To soft-delete, use ae_journal_entry_disable instead."
|
||||
),
|
||||
parameters=types.Schema(
|
||||
type=types.Type.OBJECT,
|
||||
properties={
|
||||
"entry_id": types.Schema(type=types.Type.STRING, description="Journal entry id_random"),
|
||||
"title": types.Schema(type=types.Type.STRING, description="New title"),
|
||||
"content": types.Schema(type=types.Type.STRING, description="Replacement content (full, markdown supported)"),
|
||||
"summary": types.Schema(type=types.Type.STRING, description="New summary"),
|
||||
"tags": types.Schema(type=types.Type.STRING, description="Replacement comma-separated tags"),
|
||||
"enable": types.Schema(type=types.Type.BOOLEAN, description="Set false to hide/disable the entry"),
|
||||
},
|
||||
required=["entry_id"],
|
||||
),
|
||||
)
|
||||
|
||||
_ae_journal_entry_disable_declaration = types.FunctionDeclaration(
|
||||
name="ae_journal_entry_disable",
|
||||
description=(
|
||||
"Soft-delete a journal entry by setting enable=false. "
|
||||
"The entry is hidden but not permanently removed. "
|
||||
"Use ae_journal_search to find the entry_id first."
|
||||
),
|
||||
parameters=types.Schema(
|
||||
type=types.Type.OBJECT,
|
||||
properties={
|
||||
"entry_id": types.Schema(type=types.Type.STRING, description="Journal entry id_random"),
|
||||
},
|
||||
required=["entry_id"],
|
||||
),
|
||||
)
|
||||
|
||||
_ae_journal_entry_append_declaration = types.FunctionDeclaration(
|
||||
name="ae_journal_entry_append",
|
||||
description=(
|
||||
"Append a new section to the bottom of a journal entry's content. "
|
||||
"Each section gets a UTC timestamp heading unless you provide one. "
|
||||
"Ideal for timestamped logs, running notes, or data logs."
|
||||
),
|
||||
parameters=types.Schema(
|
||||
type=types.Type.OBJECT,
|
||||
properties={
|
||||
"entry_id": types.Schema(type=types.Type.STRING, description="Journal entry id_random"),
|
||||
"content": types.Schema(type=types.Type.STRING, description="The text to append (markdown supported)"),
|
||||
"heading": types.Schema(type=types.Type.STRING, description="Optional section heading (defaults to current UTC timestamp)"),
|
||||
},
|
||||
required=["entry_id", "content"],
|
||||
),
|
||||
)
|
||||
|
||||
_ae_journal_entry_prepend_declaration = types.FunctionDeclaration(
|
||||
name="ae_journal_entry_prepend",
|
||||
description=(
|
||||
"Prepend a new section to the top of a journal entry's content. "
|
||||
"Each section gets a UTC timestamp heading unless you provide one. "
|
||||
"Useful for most-recent-first logs."
|
||||
),
|
||||
parameters=types.Schema(
|
||||
type=types.Type.OBJECT,
|
||||
properties={
|
||||
"entry_id": types.Schema(type=types.Type.STRING, description="Journal entry id_random"),
|
||||
"content": types.Schema(type=types.Type.STRING, description="The text to prepend (markdown supported)"),
|
||||
"heading": types.Schema(type=types.Type.STRING, description="Optional section heading (defaults to current UTC timestamp)"),
|
||||
},
|
||||
required=["entry_id", "content"],
|
||||
),
|
||||
)
|
||||
|
||||
_ae_task_list_declaration = types.FunctionDeclaration(
|
||||
name="ae_task_list",
|
||||
description=(
|
||||
@@ -187,11 +276,17 @@ _file_read_declaration = types.FunctionDeclaration(
|
||||
|
||||
_CALLABLES: dict[str, callable] = {
|
||||
"web_search": _web_search,
|
||||
"ae_journal_list": _ae_journal_list,
|
||||
"ae_journal_search": _ae_journal_search,
|
||||
"ae_journal_entry_create": _ae_journal_entry_create,
|
||||
"ae_journal_entry_update": _ae_journal_entry_update,
|
||||
"ae_journal_entry_disable": _ae_journal_entry_disable,
|
||||
"ae_journal_entry_append": _ae_journal_entry_append,
|
||||
"ae_journal_entry_prepend": _ae_journal_entry_prepend,
|
||||
"ae_task_list": _ae_task_list,
|
||||
"file_read": _file_read,
|
||||
"claude_allow_dir": _claude_allow_dir,
|
||||
"shell_exec": _shell_exec,
|
||||
"task_list": _task_list,
|
||||
"task_create": _task_create,
|
||||
"task_update": _task_update,
|
||||
@@ -236,6 +331,35 @@ _claude_allow_dir_declaration = types.FunctionDeclaration(
|
||||
),
|
||||
)
|
||||
|
||||
_shell_exec_declaration = types.FunctionDeclaration(
|
||||
name="shell_exec",
|
||||
description=(
|
||||
"Execute a shell command on the Cortex host machine and return its output. "
|
||||
"Use for system diagnostics: disk usage (df -h), process status (ps aux), "
|
||||
"directory listings (ls), memory (free -h), uptime, network info, log tails, etc. "
|
||||
"Commands run as the Cortex service user. Timeout enforced (default 30s, max 120s). "
|
||||
"Avoid destructive commands — prefer read-only system queries."
|
||||
),
|
||||
parameters=types.Schema(
|
||||
type=types.Type.OBJECT,
|
||||
properties={
|
||||
"command": types.Schema(
|
||||
type=types.Type.STRING,
|
||||
description="Shell command to run (e.g. 'df -h', 'ls ~/agents_sync/', 'journalctl --user -u cortex -n 50')",
|
||||
),
|
||||
"working_dir": types.Schema(
|
||||
type=types.Type.STRING,
|
||||
description="Optional working directory (e.g. '~/agents_sync/projects'). Defaults to home directory.",
|
||||
),
|
||||
"timeout": types.Schema(
|
||||
type=types.Type.INTEGER,
|
||||
description="Timeout in seconds (default 30, max 120)",
|
||||
),
|
||||
},
|
||||
required=["command"],
|
||||
),
|
||||
)
|
||||
|
||||
_task_list_declaration = types.FunctionDeclaration(
|
||||
name="task_list",
|
||||
description=(
|
||||
@@ -521,11 +645,17 @@ _scratch_clear_declaration = types.FunctionDeclaration(
|
||||
TOOL_DECLARATIONS = [
|
||||
types.Tool(function_declarations=[
|
||||
_web_search_declaration,
|
||||
_ae_journal_list_declaration,
|
||||
_ae_journal_search_declaration,
|
||||
_ae_journal_entry_create_declaration,
|
||||
_ae_journal_entry_update_declaration,
|
||||
_ae_journal_entry_disable_declaration,
|
||||
_ae_journal_entry_append_declaration,
|
||||
_ae_journal_entry_prepend_declaration,
|
||||
_ae_task_list_declaration,
|
||||
_file_read_declaration,
|
||||
_claude_allow_dir_declaration,
|
||||
_shell_exec_declaration,
|
||||
_task_list_declaration,
|
||||
_task_create_declaration,
|
||||
_task_update_declaration,
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
"""
|
||||
Aether Platform knowledge tools — journal search and entry creation.
|
||||
Aether Platform knowledge tools — journal search, listing, and entry management.
|
||||
|
||||
These tools give the orchestrator read/write access to the AE Journals module,
|
||||
which serves as the primary long-term knowledge base.
|
||||
|
||||
Auth: x-aether-api-key + x-account-id headers (same pattern as agents_sync scripts).
|
||||
API: V3 CRUD — POST /v3/crud/journal_entry/search, POST /v3/crud/journal/{id}/journal_entry/
|
||||
PATCH /v3/crud/journal_entry/{entry_id}, GET /v3/crud/journal_entry/{entry_id}
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
@@ -112,6 +113,52 @@ def _sync_journal_search(query: str, journal_id: str | None, max_results: int) -
|
||||
return "\n".join(lines).strip()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool: ae_journal_list
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def journal_list() -> str:
|
||||
"""List all journals accessible to the configured AE account."""
|
||||
err = _check_config()
|
||||
if err:
|
||||
return err
|
||||
return await asyncio.to_thread(_sync_journal_list)
|
||||
|
||||
|
||||
def _sync_journal_list() -> str:
|
||||
import requests
|
||||
|
||||
url = f"{settings.ae_api_url}/v3/crud/journal/search"
|
||||
try:
|
||||
resp = requests.post(
|
||||
url,
|
||||
headers=_headers(),
|
||||
json={"page_size": 100},
|
||||
timeout=settings.ae_api_timeout,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
except Exception as e:
|
||||
logger.warning("ae_journal_list failed: %s", e)
|
||||
return f"Journal list error: {e}"
|
||||
|
||||
journals = data.get("data", [])
|
||||
if not journals:
|
||||
return "No journals found for this account."
|
||||
|
||||
lines = [f"Journals ({len(journals)}):\n"]
|
||||
for j in journals:
|
||||
jid = j.get("journal_id") or j.get("id_random") or j.get("id") or "?"
|
||||
name = j.get("name") or "(untitled)"
|
||||
desc = j.get("description") or ""
|
||||
line = f"- **{name}** — id: `{jid}`"
|
||||
if desc:
|
||||
line += f"\n {desc}"
|
||||
lines.append(line)
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool: ae_journal_entry_create
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -170,8 +217,170 @@ def _sync_journal_entry_create(
|
||||
return f"Journal entry creation error: {e}"
|
||||
|
||||
entry_id = (
|
||||
result.get("data", {}).get("id_random")
|
||||
result.get("data", {}).get("journal_entry_id")
|
||||
or result.get("data", {}).get("id_random")
|
||||
or result.get("id_random")
|
||||
or "unknown"
|
||||
)
|
||||
return f"Journal entry created. id: `{entry_id}`, title: \"{title}\", journal: `{journal_id}`"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Shared helper: fetch a single journal entry by id
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _get_entry(entry_id: str) -> dict | str:
|
||||
"""Return the entry dict, or an error string on failure."""
|
||||
import requests
|
||||
url = f"{settings.ae_api_url}/v3/crud/journal_entry/{entry_id}"
|
||||
try:
|
||||
resp = requests.get(url, headers=_headers(), timeout=settings.ae_api_timeout)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
entry = data.get("data") or data
|
||||
if not isinstance(entry, dict):
|
||||
return f"Unexpected response shape for entry {entry_id}"
|
||||
return entry
|
||||
except Exception as e:
|
||||
logger.warning("_get_entry %s failed: %s", entry_id, e)
|
||||
return f"Error fetching entry {entry_id}: {e}"
|
||||
|
||||
|
||||
def _patch_entry(entry_id: str, payload: dict) -> str:
|
||||
"""PATCH a journal entry. Returns a success/error string."""
|
||||
import requests
|
||||
url = f"{settings.ae_api_url}/v3/crud/journal_entry/{entry_id}"
|
||||
try:
|
||||
resp = requests.patch(
|
||||
url,
|
||||
headers=_headers(),
|
||||
json=payload,
|
||||
timeout=settings.ae_api_timeout,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return "ok"
|
||||
except Exception as e:
|
||||
logger.warning("_patch_entry %s failed: %s", entry_id, e)
|
||||
return f"Error updating entry {entry_id}: {e}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool: ae_journal_entry_update
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def journal_entry_update(
|
||||
entry_id: str,
|
||||
title: str = "",
|
||||
content: str = "",
|
||||
summary: str = "",
|
||||
tags: str = "",
|
||||
enable: bool | None = None,
|
||||
) -> str:
|
||||
"""Update fields on an existing journal entry. Only provided fields are changed."""
|
||||
err = _check_config()
|
||||
if err:
|
||||
return err
|
||||
return await asyncio.to_thread(_sync_journal_entry_update, entry_id, title, content, summary, tags, enable)
|
||||
|
||||
|
||||
def _sync_journal_entry_update(
|
||||
entry_id: str,
|
||||
title: str,
|
||||
content: str,
|
||||
summary: str,
|
||||
tags: str,
|
||||
enable: bool | None,
|
||||
) -> str:
|
||||
payload: dict = {}
|
||||
if title:
|
||||
payload["name"] = title
|
||||
if content:
|
||||
payload["content"] = content
|
||||
if summary:
|
||||
payload["summary"] = summary
|
||||
if tags:
|
||||
payload["tags"] = [t.strip() for t in tags.split(",") if t.strip()]
|
||||
if enable is not None:
|
||||
payload["enable"] = enable
|
||||
|
||||
if not payload:
|
||||
return "Nothing to update — no fields provided."
|
||||
|
||||
result = _patch_entry(entry_id, payload)
|
||||
if result != "ok":
|
||||
return result
|
||||
|
||||
updated = ", ".join(payload.keys())
|
||||
return f"Journal entry `{entry_id}` updated. Fields changed: {updated}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool: ae_journal_entry_disable
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def journal_entry_disable(entry_id: str) -> str:
|
||||
"""Soft-delete a journal entry by setting enable=false."""
|
||||
err = _check_config()
|
||||
if err:
|
||||
return err
|
||||
return await asyncio.to_thread(_patch_entry, entry_id, {"enable": False})
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool: ae_journal_entry_append
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def journal_entry_append(entry_id: str, content: str, heading: str = "") -> str:
|
||||
"""Append a timestamped section to the bottom of a journal entry's content."""
|
||||
err = _check_config()
|
||||
if err:
|
||||
return err
|
||||
return await asyncio.to_thread(_sync_journal_entry_append, entry_id, content, heading)
|
||||
|
||||
|
||||
def _sync_journal_entry_append(entry_id: str, content: str, heading: str) -> str:
|
||||
from datetime import datetime, timezone
|
||||
|
||||
entry = _get_entry(entry_id)
|
||||
if isinstance(entry, str):
|
||||
return entry
|
||||
|
||||
existing = (entry.get("content") or "").rstrip()
|
||||
ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
|
||||
section_heading = heading or ts
|
||||
new_content = f"{existing}\n\n### {section_heading}\n{content.strip()}"
|
||||
|
||||
result = _patch_entry(entry_id, {"content": new_content})
|
||||
if result != "ok":
|
||||
return result
|
||||
return f"Appended to journal entry `{entry_id}` under heading \"{section_heading}\"."
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool: ae_journal_entry_prepend
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def journal_entry_prepend(entry_id: str, content: str, heading: str = "") -> str:
|
||||
"""Prepend a timestamped section to the top of a journal entry's content."""
|
||||
err = _check_config()
|
||||
if err:
|
||||
return err
|
||||
return await asyncio.to_thread(_sync_journal_entry_prepend, entry_id, content, heading)
|
||||
|
||||
|
||||
def _sync_journal_entry_prepend(entry_id: str, content: str, heading: str) -> str:
|
||||
from datetime import datetime, timezone
|
||||
|
||||
entry = _get_entry(entry_id)
|
||||
if isinstance(entry, str):
|
||||
return entry
|
||||
|
||||
existing = (entry.get("content") or "").lstrip()
|
||||
ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
|
||||
section_heading = heading or ts
|
||||
new_content = f"### {section_heading}\n{content.strip()}\n\n{existing}"
|
||||
|
||||
result = _patch_entry(entry_id, {"content": new_content})
|
||||
if result != "ok":
|
||||
return result
|
||||
return f"Prepended to journal entry `{entry_id}` under heading \"{section_heading}\"."
|
||||
|
||||
@@ -6,6 +6,7 @@ These tools affect the host system directly. Use with care.
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -42,3 +43,43 @@ async def claude_allow_dir(path: str, mode: str = "rw") -> str:
|
||||
except Exception as e:
|
||||
logger.error("claude_allow_dir error: %s", e)
|
||||
return f"Error: {e}"
|
||||
|
||||
|
||||
async def shell_exec(command: str, working_dir: str | None = None, timeout: int = 30) -> str:
|
||||
"""Execute a shell command on the Cortex host and return combined stdout/stderr."""
|
||||
timeout = min(max(timeout, 1), 120)
|
||||
|
||||
cwd = None
|
||||
if working_dir:
|
||||
cwd = os.path.expanduser(working_dir)
|
||||
if not os.path.isdir(cwd):
|
||||
return f"Error: working_dir '{working_dir}' does not exist or is not a directory"
|
||||
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_shell(
|
||||
command,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
cwd=cwd,
|
||||
)
|
||||
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout)
|
||||
|
||||
out = stdout.decode(errors="replace").strip()
|
||||
err = stderr.decode(errors="replace").strip()
|
||||
|
||||
parts = []
|
||||
if out:
|
||||
parts.append(out)
|
||||
if err:
|
||||
parts.append(f"[stderr]\n{err}")
|
||||
combined = "\n".join(parts) if parts else "(no output)"
|
||||
|
||||
if proc.returncode != 0:
|
||||
return f"Exit {proc.returncode}:\n{combined}"
|
||||
return combined
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
return f"Error: command timed out after {timeout}s"
|
||||
except Exception as e:
|
||||
logger.error("shell_exec error: %s", e)
|
||||
return f"Error: {e}"
|
||||
|
||||
@@ -1,18 +1,20 @@
|
||||
# Architecture: LLM Backends
|
||||
|
||||
> How Cortex selects and talks to AI models.
|
||||
> Last updated: 2026-04-06
|
||||
> Last updated: 2026-04-27 (V2 schema)
|
||||
|
||||
---
|
||||
|
||||
## Backends
|
||||
## Providers
|
||||
|
||||
| Backend | Type | Auth | Notes |
|
||||
|---|---|---|---|
|
||||
| **Claude CLI** | `claude_cli` | OAuth token from `~/.claude/.credentials.json` | Primary chat; model set via `DEFAULT_MODEL` in `.env` |
|
||||
| **Gemini CLI** | `gemini_cli` | Gemini CLI credentials | Fallback / explicit selection |
|
||||
| **Gemini API** | `gemini_api` | `GEMINI_API_KEY` in `.env` | Orchestrator tool loop only — not general chat |
|
||||
| **Local (OpenAI-compat)** | `local_openai` | API key per host in model registry | Open WebUI, Ollama, OpenRouter, LiteLLM, etc. |
|
||||
Cortex supports four model types, each dispatched differently:
|
||||
|
||||
| Type | Auth | Use |
|
||||
|---|---|---|
|
||||
| `claude_cli` | OAuth token from `~/.claude/.credentials.json` | Chat, persona responses |
|
||||
| `gemini_cli` | Gemini CLI credentials | Chat fallback / explicit selection |
|
||||
| `gemini_api` | API key from registry account or `.env` | Orchestrator tool loop |
|
||||
| `local_openai` | API key per host in model registry | Open WebUI, Ollama, OpenRouter, LiteLLM, etc. |
|
||||
|
||||
---
|
||||
|
||||
@@ -26,93 +28,129 @@ request's **role** in the user's model registry. Roles: `chat`, `orchestrator`,
|
||||
|
||||
Resolution order for a role:
|
||||
1. User registry: `roles[role].primary → backup_1 → backup_2 → backup_3 → backup_4`
|
||||
2. `.env` role default: `ROLE_CHAT=claude_cli`, `ROLE_DISTILL=gemini_api`, etc.
|
||||
2. `.env` role default: `ROLE_CHAT=claude_cli`, `ROLE_DISTILL=claude_cli`, etc.
|
||||
3. Hardcoded last-resort: `chat/distill/coder → claude_cli`, `orchestrator/research → gemini_api`
|
||||
|
||||
### Explicit Override
|
||||
|
||||
The UI backend toggle cycles: **auto → claude → gemini → local → auto**
|
||||
|
||||
- **auto** (default): role-based routing as above; sends `model: null` to `/chat`
|
||||
- **claude / gemini / local**: bypasses role routing; forces that specific backend
|
||||
- When "local" is active, the configured model name appears below the toggle button
|
||||
- **auto** (default): role-based routing as above
|
||||
- **claude / gemini / local**: bypasses role routing; forces that backend type
|
||||
- The toggle will be redesigned in Phase 3 to cycle through chat role slots (Primary / Backup 1 / Backup 2)
|
||||
|
||||
**Fallback chain** (automatic, on any error):
|
||||
**Fallback chain** (automatic, only when no explicit registry entry exists):
|
||||
```
|
||||
claude → gemini
|
||||
gemini → claude
|
||||
local → claude
|
||||
```
|
||||
When a model is explicitly configured in the registry, errors surface immediately — no silent fallback.
|
||||
|
||||
Each response includes a model label (bottom-right of the message bubble) showing what
|
||||
actually responded. Amber label with `⚡` = fallback was used.
|
||||
|
||||
Auth expiry on Claude triggers a UI banner + `claude_auth_expired` SSE event.
|
||||
Each response shows a model tag (bottom-right of the message bubble) with the model label and host.
|
||||
|
||||
---
|
||||
|
||||
## Model Registry
|
||||
## Model Registry — V2 Schema
|
||||
|
||||
Per-user configuration stored in `home/{user}/model_registry.json`.
|
||||
|
||||
Hosts and models are managed at **Settings → Model Registry** (`/settings/local`).
|
||||
|
||||
### Schema
|
||||
Managed at **Settings → Models** (`/settings/models`). Full provider UI coming in Phase 2.
|
||||
|
||||
```json
|
||||
{
|
||||
"version": 1,
|
||||
"version": 2,
|
||||
|
||||
"providers": {
|
||||
"anthropic": {
|
||||
"credentials": [
|
||||
{"id": "cli", "label": "Claude CLI (OAuth)", "type": "cli"}
|
||||
]
|
||||
},
|
||||
"google": {
|
||||
"accounts": [
|
||||
{"id": "a1b2", "label": "One Sky IT", "api_key": "AIza..."}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"hosts": [
|
||||
{
|
||||
"id": "abc123",
|
||||
"label": "Home ML Laptop",
|
||||
"label": "Gaming Laptop",
|
||||
"api_url": "http://192.168.x.x:3000",
|
||||
"api_key": "sk-...",
|
||||
"api_key": "",
|
||||
"host_type": "openwebui"
|
||||
}
|
||||
],
|
||||
|
||||
"models": [
|
||||
{
|
||||
"id": "def456",
|
||||
"id": "m1",
|
||||
"type": "claude_cli",
|
||||
"label": "Sonnet 4.6 (CLI)",
|
||||
"model_name": "claude-sonnet-4-6",
|
||||
"provider": "anthropic",
|
||||
"credential_id": "cli",
|
||||
"context_k": 200,
|
||||
"tags": ["chat", "persona"]
|
||||
},
|
||||
{
|
||||
"id": "m2",
|
||||
"type": "gemini_api",
|
||||
"label": "Gemini 2.5 Flash (OSIT)",
|
||||
"model_name": "gemini-2.5-flash",
|
||||
"provider": "google",
|
||||
"account_id": "a1b2",
|
||||
"context_k": 1000,
|
||||
"tags": ["orchestrator", "research"]
|
||||
},
|
||||
{
|
||||
"id": "m3",
|
||||
"type": "local_openai",
|
||||
"label": "Gemma Medium",
|
||||
"model_name": "agent-support-gemma-medium",
|
||||
"label": "Gemma 4 E4B",
|
||||
"model_name": "gemma4:e4b",
|
||||
"provider": "local",
|
||||
"host_id": "abc123",
|
||||
"context_k": 50,
|
||||
"tags": ["chat", "fast"]
|
||||
"context_k": 72,
|
||||
"tags": ["fast", "local"]
|
||||
}
|
||||
],
|
||||
|
||||
"roles": {
|
||||
"chat": {
|
||||
"primary": "def456",
|
||||
"backup_1": "claude_cli"
|
||||
}
|
||||
"chat": {"primary": "m1", "backup_1": "m2", "backup_2": "m3"},
|
||||
"orchestrator": {"primary": "m2", "backup_1": "m3"},
|
||||
"distill": {"primary": "m1"}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### host_type
|
||||
|
||||
Controls which API path layout is used:
|
||||
### host_type (local hosts)
|
||||
|
||||
| `host_type` | Chat endpoint | Models endpoint | Use for |
|
||||
|---|---|---|---|
|
||||
| `openwebui` (default) | `POST {url}/api/chat/completions` | `GET {url}/api/models` | Open WebUI, Ollama |
|
||||
| `openai` | `POST {url}/chat/completions` | `GET {url}/models` | OpenRouter, LiteLLM, Anthropic-compat |
|
||||
|
||||
Set `api_url` to the base path ending just before `/chat/completions`:
|
||||
Set `api_url` to the base path before `/chat/completions`:
|
||||
- OpenRouter: `https://openrouter.ai/api/v1`
|
||||
- LiteLLM proxy: `http://host:port`
|
||||
|
||||
### Built-in model IDs
|
||||
|
||||
Always resolvable without a registry entry:
|
||||
Always resolvable without a user-created registry entry. Used as role defaults.
|
||||
|
||||
| ID | Backend |
|
||||
|---|---|
|
||||
| `claude_cli` | Claude CLI subprocess |
|
||||
| `gemini_cli` | Gemini CLI subprocess |
|
||||
| `gemini_api` | Gemini API (SDK) — orchestrator only |
|
||||
| ID | Type | Notes |
|
||||
|---|---|---|
|
||||
| `claude_cli` | `claude_cli` | Model from `DEFAULT_MODEL` in `.env` |
|
||||
| `gemini_cli` | `gemini_cli` | Gemini CLI subprocess |
|
||||
| `gemini_api` | `gemini_api` | Model from `ORCHESTRATOR_MODEL` in `.env`; key from `GEMINI_API_KEY` |
|
||||
|
||||
### V1 → V2 migration
|
||||
|
||||
Automatic on first load. Changes:
|
||||
- Adds `providers` section (Anthropic CLI credential + empty Google accounts)
|
||||
- Migrates `gemini_api_key` from `auth.json` → `providers.google.accounts[0]`
|
||||
- All existing hosts, models, and role assignments are preserved
|
||||
|
||||
---
|
||||
|
||||
@@ -122,9 +160,9 @@ Runs `claude --print --no-session-persistence --output-format text` as a subproc
|
||||
|
||||
- System prompt passed via `--system-prompt`
|
||||
- Conversation history formatted as `<conversation>` block
|
||||
- Token read live from `~/.claude/.credentials.json` on every call — never relies on the
|
||||
- Token read live from `~/.claude/.credentials.json` on every call — never uses the
|
||||
env var, which goes stale after `claude auth login`
|
||||
- Model override via `--model` flag when a specific `model_name` is configured in the registry
|
||||
- Model override via `--model` flag when `model_name` is set in the registry entry
|
||||
|
||||
Timeout: `TIMEOUT_CLAUDE=60` seconds (`.env`)
|
||||
|
||||
@@ -136,7 +174,7 @@ Runs `gemini --output-format text --extensions "" -p <prompt>` as a subprocess.
|
||||
|
||||
- `--extensions ""` disables all MCP extensions — prevents child processes keeping pipes open
|
||||
- `start_new_session=True` puts the process in its own group for clean `os.killpg` on timeout
|
||||
- Output is cleaned to strip CLI noise lines (loading messages, retry notices, quota warnings)
|
||||
- Output is cleaned to strip CLI noise (loading messages, retry notices, quota warnings)
|
||||
|
||||
Timeout: `TIMEOUT_GEMINI=120` seconds (`.env`)
|
||||
|
||||
@@ -155,13 +193,30 @@ Timeout: `TIMEOUT_LOCAL=300` seconds (`.env`) — local models may need to load
|
||||
|
||||
---
|
||||
|
||||
## Gemini API (Orchestrator)
|
||||
|
||||
Used by `orchestrator_engine.py` for the ReAct tool loop. Not used for general chat.
|
||||
|
||||
API key resolution order:
|
||||
1. `api_key` embedded in the resolved orchestrator model config (V2 registry with `account_id`)
|
||||
2. `get_user_gemini_key(user)` — reads from `auth.json` (legacy, kept for compat)
|
||||
3. `GEMINI_API_KEY` in `.env` (server default)
|
||||
|
||||
---
|
||||
|
||||
## Distillation
|
||||
|
||||
Memory distillation uses `role="distill"` for mid and long passes. Configure the distill
|
||||
model via the Model Registry → Role Assignments → Distill role.
|
||||
Memory distillation uses `role="distill"`. Configure via Model Registry → Role Assignments.
|
||||
|
||||
`.env` override: `ROLE_DISTILL=claude_cli` (default). Set to any built-in ID or leave blank
|
||||
to fall through to the hardcoded default (`claude_cli`).
|
||||
`.env` override: `ROLE_DISTILL=claude_cli` (default).
|
||||
|
||||
---
|
||||
|
||||
## Future: Phase 3 — Backend Toggle Redesign
|
||||
|
||||
The `claude → gemini → local` toggle will be replaced with a slot toggle that cycles
|
||||
through the chat role's configured models (Primary → Backup 1 → Backup 2), showing
|
||||
the actual model label. See `DESIGN__Model_Registry_V2.md`.
|
||||
|
||||
---
|
||||
|
||||
@@ -170,7 +225,8 @@ to fall through to the hardcoded default (`claude_cli`).
|
||||
| File | Responsibility |
|
||||
|---|---|
|
||||
| `cortex/llm_client.py` | `complete()` — routing, dispatch, fallback |
|
||||
| `cortex/model_registry.py` | Per-user registry CRUD and resolution |
|
||||
| `cortex/model_registry.py` | Per-user registry CRUD and resolution (V2) |
|
||||
| `cortex/routers/local_llm.py` | Settings UI routes + `/api/models/role` AJAX |
|
||||
| `cortex/routers/chat.py` | `_backend_label()`, `fallback_used` flag |
|
||||
| `cortex/routers/orchestrator.py` | Engine selection, Gemini API key resolution |
|
||||
| `cortex/config.py` | `ROLE_*` env defaults, `DEFINED_ROLES`, `PRIMARY_BACKEND` |
|
||||
|
||||
@@ -33,7 +33,7 @@ Single-page app served from `cortex/static/`. All chat happens via `POST /chat`
|
||||
|
||||
**Files panel:** Browse and edit persona markdown files in-browser. Session search at the bottom.
|
||||
|
||||
**Settings:** `/settings` — Gemini API key, Google account, connected status. `/settings/local` — local model hosts and models.
|
||||
**Settings:** `/settings` — Gemini API key, Google account, connected status. `/settings/models` — model registry (providers, hosts, models, roles).
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# Architecture: Planned Features
|
||||
|
||||
> What's next and how it's designed to work.
|
||||
> Last updated: 2026-04-04
|
||||
> Last updated: 2026-04-28
|
||||
|
||||
For the current task list see `TODO__Agents.md`. For phases and priorities see `ROADMAP.md`.
|
||||
|
||||
@@ -9,7 +9,7 @@ For the current task list see `TODO__Agents.md`. For phases and priorities see `
|
||||
|
||||
## 1. Local Orchestrator
|
||||
|
||||
**Status:** High priority — design complete, not yet built.
|
||||
**Status:** Partially built — `openai_orchestrator.py` exists and is wired into `POST /orchestrate`. If the `orchestrator` role in the model registry resolves to a `local_openai` model, it routes there automatically. Full parity with the Gemini orchestrator (tool loop quality, error handling, context budget enforcement) is still in progress.
|
||||
|
||||
Same ReAct tool loop as the Gemini API orchestrator, but driven by a local model via Open WebUI's OpenAI-compatible API. Enables offline/private agent tasks with no API cost.
|
||||
|
||||
@@ -124,7 +124,7 @@ AE Journals becomes the searchable long-term knowledge base. Complements memory
|
||||
|
||||
## 5. Intelligent Model Routing
|
||||
|
||||
**Status:** Deferred. Currently user-toggled.
|
||||
**Status:** Partially addressed. Model Registry V2 (2026-04-27) introduced role-based routing — `chat`, `orchestrator`, `distill`, `coder`, `research` roles each have their own primary/backup model chain, and the UI role toggle lets users manually select which role handles a message. Automatic task-characteristic routing (below) is still deferred.
|
||||
|
||||
Route automatically based on task characteristics rather than requiring manual backend selection:
|
||||
|
||||
@@ -183,10 +183,31 @@ The Claude Code system prompt was leaked in early April 2026. Two reimplementati
|
||||
|
||||
**Status:** Deferred.
|
||||
|
||||
Currently running on `scott_lpt` (main laptop). Long-term target: home server (always-on, Docker).
|
||||
Currently running on `scott-lt-i7-rtx` (gaming/agents laptop). Disabled on `scott_lpt` (2026-04-28) — that machine is a dev/editing node only. Long-term target: home server (always-on, Docker).
|
||||
|
||||
`docker-compose.yml` already exists in the project root. Deployment path:
|
||||
1. Copy to home server
|
||||
2. Configure reverse proxy (Nginx, already Docker-hosted)
|
||||
3. Set subdomain `cortex.dgrzone.com` → home server internal IP
|
||||
4. WireGuard required for all access — not internet-exposed
|
||||
|
||||
---
|
||||
|
||||
## 9. Cortex Mesh (Multi-Instance Fleet)
|
||||
|
||||
**Status:** Concept — no design yet.
|
||||
|
||||
Rather than a single Cortex instance, each device in the fleet runs its own instance with its own persona(s), local models, and capabilities. Instances can delegate tasks to each other based on available resources and roles.
|
||||
|
||||
**Use cases:**
|
||||
- `scott_lpt` (edit/dev node) delegates code tasks to `scott-lt-i7-rtx` (GPU/Ollama host)
|
||||
- A background cron on one instance triggers an orchestrated task on another
|
||||
- Each instance has its own "best available" model — mesh routing picks the right node automatically
|
||||
|
||||
**Design questions to resolve:**
|
||||
- Auth between instances (shared JWT secret vs. per-instance API keys)
|
||||
- How instances advertise capabilities (model registry over HTTP? shared Syncthing file?)
|
||||
- Whether `ae_send_message` / the existing inbox system is the right coordination layer or if a dedicated Cortex-to-Cortex protocol is needed
|
||||
- Session continuity — does a conversation that starts on one node stay there, or can it migrate?
|
||||
|
||||
The Syncthing-synced `home/` directory and shared `model_registry.json` already provide a natural foundation — instances share persona memory and context without a central DB.
|
||||
|
||||
199
documentation/DESIGN__Model_Registry_V2.md
Normal file
199
documentation/DESIGN__Model_Registry_V2.md
Normal file
@@ -0,0 +1,199 @@
|
||||
# Model Registry V2 — Design Document
|
||||
|
||||
> Status: Phase 3 in progress
|
||||
> Goal: Unified, provider-agnostic model management with clean role-based routing
|
||||
|
||||
---
|
||||
|
||||
## Problem Statement
|
||||
|
||||
The original system had two classes of models with different treatment:
|
||||
|
||||
| Type | How configured | How selected |
|
||||
|---|---|---|
|
||||
| Claude, Gemini | Hardcoded built-ins (`claude_cli`, `gemini_api`) | Backend toggle string ("claude"/"gemini") |
|
||||
| Local (Ollama, Open WebUI) | Configured via `/settings/local` | Backend toggle string "local" |
|
||||
|
||||
This breaks down when you want multiple Gemini API keys, OpenRouter alongside local models,
|
||||
role assignments spanning all provider types, or a toggle that shows which model is active
|
||||
instead of which service.
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
### Core concept: Providers + Credentials + Models + Roles
|
||||
|
||||
```
|
||||
Providers (built-in, fixed set)
|
||||
└─ Anthropic ← catalog of Claude model IDs (code constants)
|
||||
└─ Google ← catalog of Gemini model IDs (code constants)
|
||||
└─ Local Host ← OpenAI-compatible endpoint (user adds these)
|
||||
|
||||
Credentials (user-configured, stored in model_registry.json)
|
||||
└─ Anthropic ← Claude CLI (OAuth, default) — API key support in Phase 4
|
||||
└─ Google ← one or more API keys (one per Google account)
|
||||
└─ Local Host ← api_key stored on the host record
|
||||
|
||||
Model Entries (user-registered)
|
||||
└─ Provider + model ID + credential = one usable model entry
|
||||
|
||||
Role Assignments (unified — any model entry can fill any role)
|
||||
└─ chat: primary → backup_1 → backup_2
|
||||
└─ orchestrator: primary → backup_1
|
||||
└─ distill: primary
|
||||
└─ (etc.)
|
||||
```
|
||||
|
||||
### Catalog design decision
|
||||
|
||||
Catalogs (`ANTHROPIC_CATALOG`, `GOOGLE_CATALOG`) are **Python constants** in
|
||||
`model_registry.py`, not stored in the per-user JSON. Updated with each code deploy.
|
||||
Per-user catalog customisation is deferred to Phase 4.
|
||||
|
||||
### Backend toggle redesign (Phase 3)
|
||||
|
||||
**Before:** cycles service type strings — `auto → claude → gemini → local`
|
||||
|
||||
**After:** cycles through the chat role's configured models by label:
|
||||
```
|
||||
Sonnet 4.6 (CLI) → Gemini 2.5 Flash → Gemma 4 E4B → (wraps)
|
||||
```
|
||||
- Shows the resolved model label on the toggle button
|
||||
- If no chat role models are configured: shows "auto", uses existing role routing
|
||||
- Click skips empty slots automatically
|
||||
- Color: `claude_cli` = default, `gemini_*` = blue, `local_openai` = amber
|
||||
|
||||
UI sends `slot: "primary" | "backup_1" | "backup_2"` (not backend type string).
|
||||
`llm_client.complete()` resolves that slot from the chat role and dispatches by `type`.
|
||||
|
||||
---
|
||||
|
||||
## Data Model (V2 Schema)
|
||||
|
||||
Stored in `home/{user}/model_registry.json`.
|
||||
|
||||
```json
|
||||
{
|
||||
"version": 2,
|
||||
"providers": {
|
||||
"anthropic": {
|
||||
"credentials": [{"id": "cli", "label": "Claude CLI (OAuth)", "type": "cli"}]
|
||||
},
|
||||
"google": {
|
||||
"accounts": [{"id": "a1b2", "label": "One Sky IT", "api_key": "AIza..."}]
|
||||
}
|
||||
},
|
||||
"hosts": [
|
||||
{"id": "h1", "label": "Gaming Laptop", "api_url": "http://...", "api_key": "", "host_type": "openwebui"}
|
||||
],
|
||||
"models": [
|
||||
{"id": "m1", "type": "claude_cli", "label": "Sonnet 4.6 (CLI)", "model_name": "claude-sonnet-4-6", "provider": "anthropic", "credential_id": "cli", "context_k": 1000, "tags": []},
|
||||
{"id": "m2", "type": "gemini_api", "label": "Gemini 2.5 Flash", "model_name": "gemini-2.5-flash", "provider": "google", "account_id": "a1b2", "context_k": 1000, "tags": []},
|
||||
{"id": "m3", "type": "local_openai", "label": "Gemma 4 E4B", "model_name": "gemma4:e4b", "provider": "local", "host_id": "h1", "context_k": 72, "tags": []}
|
||||
],
|
||||
"roles": {
|
||||
"chat": {"primary": "m1", "backup_1": "m2", "backup_2": "m3"},
|
||||
"orchestrator":{"primary": "m2", "backup_1": "m3"},
|
||||
"distill": {"primary": "m1"}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Model types and dispatch
|
||||
|
||||
| `type` | Dispatches via | Notes |
|
||||
|---|---|---|
|
||||
| `claude_cli` | Claude CLI subprocess | `~/.claude/.credentials.json` OAuth |
|
||||
| `gemini_cli` | Gemini CLI subprocess | |
|
||||
| `gemini_api` | Currently: Gemini CLI (gap — see Phase 4) | Should use google-genai SDK |
|
||||
| `local_openai` | HTTP to OpenAI-compatible endpoint | host_type controls path |
|
||||
|
||||
### Built-in model IDs
|
||||
|
||||
Always resolvable without a registry entry (used as `.env` role defaults):
|
||||
`claude_cli`, `gemini_cli`, `gemini_api`
|
||||
|
||||
---
|
||||
|
||||
## Resolution Logic
|
||||
|
||||
`get_model_for_role(username, role)` — walks `primary → backup_1 → backup_2 → backup_3 → backup_4`, returns first resolved model config with credentials merged in. Falls back to `.env` defaults, then hardcoded last-resort.
|
||||
|
||||
`get_model_for_slot(username, role, slot)` — resolves *only* the named slot, no fallback chain. Used by Phase 3 explicit slot selection.
|
||||
|
||||
---
|
||||
|
||||
## Routing Code
|
||||
|
||||
### `llm_client.complete()` (Phase 3 update)
|
||||
|
||||
```
|
||||
slot: str | None → resolve specific slot, no fallback (explicit selection)
|
||||
model: str | None → legacy backend strings, kept for backward compat
|
||||
(neither) → auto: role-based routing with full fallback chain
|
||||
```
|
||||
|
||||
Dispatch table (`type` → backend function):
|
||||
- `claude_cli` → `_claude()`
|
||||
- `gemini_cli` → `_gemini()`
|
||||
- `gemini_api` → `_gemini()` ← **gap: should be `_gemini_api()` (Phase 4)**
|
||||
- `local_openai` → `_local()`
|
||||
|
||||
### `routers/chat.py` (Phase 3 update)
|
||||
|
||||
- `ChatRequest` gets `slot: str | None = None`
|
||||
- `GET /backend` returns `chat_models: [{slot, label, type}]` for the UI toggle
|
||||
- `_stream_chat` resolves model label from slot when `req.slot` is set
|
||||
|
||||
### `app.js` (Phase 3 update)
|
||||
|
||||
- Loads `chat_models` from `GET /backend` on page init
|
||||
- Toggle cycles through `chat_models` by label, sends `slot` in chat payload
|
||||
- Agent mode placeholder: remove "Gemini tool loop" hardcode → "orchestrator"
|
||||
|
||||
---
|
||||
|
||||
## Known Gaps (not yet implemented)
|
||||
|
||||
### Gap A — `gemini_api` dispatch in `llm_client` (Phase 4)
|
||||
`_TYPE_TO_BACKEND` maps `gemini_api → "gemini"` (CLI subprocess). If a user assigns a
|
||||
`gemini_api` type model to the `chat` role, it silently routes to the Gemini CLI instead
|
||||
of the Google genai SDK. Fix: add `_gemini_api()` in `llm_client.py` that calls the SDK
|
||||
directly, matching how `orchestrator_engine.py` does it. Needs API key from resolved config.
|
||||
|
||||
### Gap B — Agent mode placeholder (Phase 3, quick fix)
|
||||
`app.js` lines 257–258 hard-code `"Gemini tool loop"`. Should say `"orchestrator"` since
|
||||
the orchestrator role can now be a local model.
|
||||
|
||||
---
|
||||
|
||||
## Phases
|
||||
|
||||
### Phase 1 — Data model + routing ✅ 2026-04-27
|
||||
- V2 schema with `providers` section
|
||||
- Auto migration V1→V2 (pulls gemini_api_key from auth.json → Google accounts)
|
||||
- `_resolve_model()` merges account API key for `gemini_api` type
|
||||
- `get_google_api_key()`, `save_cloud_model()`, `save/remove_google_account()`
|
||||
- Orchestrator router uses model-resolved API key
|
||||
|
||||
### Phase 2 — Cloud provider UI ✅ 2026-04-27
|
||||
- `/settings/models` (canonical, `/settings/local` redirects)
|
||||
- Cloud Providers section: Anthropic info + Google account add/remove
|
||||
- Add Model form with provider tabs (Local / Google / Anthropic)
|
||||
- Provider badges on model rows (Anthropic / Google / Local)
|
||||
- Settings page updated: Gemini Key section replaced by Model Registry card
|
||||
|
||||
### Phase 3 — Toggle redesign + routing cleanup 🔄 in progress
|
||||
- `model_registry.get_model_for_slot()` — resolve a specific slot without fallback chain
|
||||
- `llm_client.complete()` — add `slot` parameter
|
||||
- `routers/chat.py` — `ChatRequest.slot`, extend `GET /backend`, slot label in response tag
|
||||
- `app.js` — data-driven toggle cycling model labels; send `slot` not backend string
|
||||
- Fix Gap B: agent mode placeholder
|
||||
|
||||
### Phase 4 — Polish + future providers
|
||||
- Fix Gap A: `gemini_api` dispatch in `llm_client` → direct Google genai SDK for chat
|
||||
- Claude direct API key support (alternative to CLI OAuth)
|
||||
- OpenRouter as a named provider (already works as local host; could be promoted)
|
||||
- Per-role "test" button in role assignments UI
|
||||
- Per-user catalog additions (extend ANTHROPIC_CATALOG / GOOGLE_CATALOG from UI)
|
||||
@@ -1,7 +1,7 @@
|
||||
# Cortex / Inara — Master Index
|
||||
|
||||
> Start here. This document is a map, not a manual.
|
||||
> Last updated: 2026-04-03
|
||||
> Last updated: 2026-04-28
|
||||
|
||||
---
|
||||
|
||||
@@ -23,7 +23,8 @@ Cortex is a self-hosted personal AI platform. It routes messages from any input
|
||||
| Claude backend | ✅ Live | Primary — via Claude Code CLI |
|
||||
| Gemini backend | ✅ Live | Fallback — via Gemini CLI |
|
||||
| Local backend | ✅ Live | Third option — Open WebUI/Ollama on scott_gaming |
|
||||
| Gemini orchestrator | ✅ Live | Tool loop → Claude response, Agent mode in UI |
|
||||
| Gemini orchestrator | ✅ Live | Tool loop → Claude response, ⚡ Tools toggle in UI (27 tools) |
|
||||
| Model registry V2 | ✅ Live | Providers (Anthropic/Google/Local), multi-account Gemini |
|
||||
| Memory distillation | ✅ Live | Short (daily) / Mid (weekly) / Long (monthly) |
|
||||
| Multi-user | ✅ Live | Scott, Holly, Brian — each with own personas |
|
||||
| Session search | ✅ Live | Full-text search across past session logs |
|
||||
|
||||
@@ -27,10 +27,22 @@ Gemini API orchestrator for private/offline tasks.
|
||||
|
||||
## 🟡 Medium Priority
|
||||
|
||||
### [Models] Model Registry V2 — Unified Provider System
|
||||
See `DESIGN__Model_Registry_V2.md` for full design.
|
||||
- [x] **Phase 1** — V2 schema with providers (Anthropic/Google), multi-account Gemini, auto migration, orchestrator uses account API key — 2026-04-27
|
||||
- [ ] **Phase 2** — Cloud provider UI: Anthropic + Google sections in `/settings/models`, account management, model entry creation for cloud models
|
||||
- [ ] **Phase 3** — Unified roles + toggle redesign: standalone role assignments, chat toggle cycles role slots (Primary/Backup 1/Backup 2) showing model label
|
||||
- [ ] **Phase 4** — Polish: Claude API key, OpenRouter as named provider, catalog sync from API
|
||||
|
||||
### [Intelligence] Knowledge consolidation — Phase 1
|
||||
See `ARCH__Intelligence_Layer.md` for full design.
|
||||
- [x] Tool: `ae_journal_list` — list all journals for the account — 2026-04-28
|
||||
- [x] Tool: `ae_journal_search` — search before creating to avoid duplicates
|
||||
- [x] Tool: `ae_journal_entry_create` — write a new entry with source metadata
|
||||
- [x] Tool: `ae_journal_entry_update` — PATCH any fields on an existing entry — 2026-04-28
|
||||
- [x] Tool: `ae_journal_entry_disable` — soft-delete via enable=false — 2026-04-28
|
||||
- [x] Tool: `ae_journal_entry_append` — read→append timestamped section→write (running logs) — 2026-04-28
|
||||
- [x] Tool: `ae_journal_entry_prepend` — read→prepend timestamped section→write (newest-first logs) — 2026-04-28
|
||||
- [ ] Import script: walk a markdown directory, chunk by H2 section, create entries
|
||||
- [ ] Target: markdown files from `~/DgrZone_Nextcloud/` and `~/OSIT_Nextcloud/`
|
||||
- [ ] Tag strategy: source path, date, topic tags from frontmatter or filename
|
||||
@@ -92,6 +104,32 @@ base accessible to local models. Endpoints documented in `docs/OPEN_WEBUI_API.md
|
||||
|
||||
## ✅ Completed
|
||||
|
||||
### [UI] Input area polish — 2026-04-28
|
||||
- Single cycling S/M/L button replaces 3 separate height buttons (same UX as font size)
|
||||
- S size collapses mode-select to a row (compact); M/L keep vertical column layout
|
||||
- Input height minimum derived from setting so empty textarea reflects selected size
|
||||
- Context & Memory panel and Settings dropdown are mutually exclusive (closeAllPanels fix)
|
||||
- Both panels now use consistent shadow (var(--shadow)) and z-index (200)
|
||||
|
||||
### [Tools] Tools toggle — decoupled from Role/Backend — 2026-04-28
|
||||
- Removed "Agent" mode from the mode selector; replaced with independent ⚡ toggle
|
||||
- `toolsEnabled` persists in localStorage; routes to orchestrator regardless of active mode
|
||||
- Layout: column (M/L) or row (S) driven by `data-size` attribute set by JS
|
||||
- chat_role flows from UI → OrchestrateRequest → orchestrator_engine.run(response_role=...)
|
||||
|
||||
### [Tools] shell_exec tool — 2026-04-28
|
||||
- `shell_exec(command, working_dir, timeout)` in `cortex/tools/system.py`
|
||||
- Runs any shell command on the Cortex host; timeout clamped 1–120s
|
||||
- Use for system diagnostics: `df -h`, `ps aux`, `journalctl`, `free -h`, etc.
|
||||
|
||||
### [Tools] Aether Journals full toolkit — 2026-04-28
|
||||
- `ae_journal_list` — list all journals + ids for the account
|
||||
- `ae_journal_entry_update` — PATCH any fields (title, content, summary, tags, enable)
|
||||
- `ae_journal_entry_disable` — soft-delete via enable=false
|
||||
- `ae_journal_entry_append` — read→append timestamped section→write (running/data logs)
|
||||
- `ae_journal_entry_prepend` — read→prepend timestamped section→write (newest-first)
|
||||
- Shared `_get_entry` / `_patch_entry` helpers; OpenAI JSON Schema auto-derived from Gemini declarations
|
||||
|
||||
### [Local] Per-user multi-model local LLM settings — 2026-04-01
|
||||
- `home/{username}/local_llm.json` — `hosts[]` + `models[]` + `active_model_id` structure
|
||||
- `cortex/user_settings.py` — CRUD functions: save_host, add_model, remove_model, set_active_model, get_active_local_model
|
||||
|
||||
Reference in New Issue
Block a user