From 85792a7bcffb5fcc648335b1f68d1e692e62bfde Mon Sep 17 00:00:00 2001 From: Scott Idem Date: Sat, 9 May 2026 16:12:03 -0400 Subject: [PATCH] feat: per-role inject_mode, OTR fixes, hover metadata, send/stop tooltip - inject_mode: per-role toggle (parallel to inject_datetime) gates the "Current mode: Off The Record" line in the system prompt; wired through model_registry, context_loader, chat router, orchestrator router, and local_llm settings UI - OTR orchestrator fix: OrchestrateRequest now carries off_record; _finalize_job stores it per message and gates log_turn on it; JS orchestrate payload sends off_record correctly - Per-message hover metadata: removed always-visible .model-tag; replaced with .msg-meta strip in the action bar (hover-only); shows model label, host, fallback indicator, and OTR badge; stored in session JSON - Send/stop button tooltip: shows role + model and (when tools on) separate orchestrator model + engine label; live elapsed timer on stop button via startRunTimer/stopRunTimer - OrchestratorResult.backend_label: new field; openai_orchestrator fills it; finalize_job propagates it to job dict and session messages - GET /backend: exposes orchestrator_model label so the frontend tooltip can show both models separately - TODO: session delete confirmation added Co-Authored-By: Claude Sonnet 4.6 --- cortex/context_loader.py | 3 +- cortex/model_registry.py | 12 ++- cortex/openai_orchestrator.py | 1 + cortex/orchestrator_engine.py | 1 + cortex/routers/chat.py | 17 +++- cortex/routers/local_llm.py | 17 +++- cortex/routers/orchestrator.py | 34 ++++++-- cortex/static/app.js | 143 ++++++++++++++++++++++++++++----- cortex/static/local_llm.html | 8 +- cortex/static/style.css | 36 ++++++--- documentation/TODO__Agents.md | 7 ++ 11 files changed, 229 insertions(+), 50 deletions(-) diff --git a/cortex/context_loader.py b/cortex/context_loader.py index bf7974e..3fe1e48 100644 --- a/cortex/context_loader.py +++ b/cortex/context_loader.py @@ -21,6 +21,7 @@ def load_context( include_short: bool = True, role_append: str = "", inject_datetime: bool = True, + inject_mode: bool = True, mode: str = "chat", ) -> str: """ @@ -45,7 +46,7 @@ def load_context( if inject_datetime: now = datetime.now().astimezone() system_lines.append(f"Current date and time: {now.strftime('%A, %Y-%m-%d at %I:%M %p %Z')}") - if mode == "otr": + if mode == "otr" and inject_mode: system_lines.append( "Current mode: Off The Record — " "this conversation is private and will not be logged or included in memory distillation" diff --git a/cortex/model_registry.py b/cortex/model_registry.py index f859796..d2d3e12 100644 --- a/cortex/model_registry.py +++ b/cortex/model_registry.py @@ -423,12 +423,13 @@ def set_role_config( system_append: str, tools: list[str] | None, inject_datetime: bool = True, + inject_mode: bool = True, ) -> None: - """Save system_append, tools allow-list, and inject_datetime flag for a role. + """Save system_append, tools allow-list, and per-injection flags for a role. tools=None clears the allow-list (role uses all accessible tools). - inject_datetime=False suppresses the current date/time from the system prompt - for this role — useful for pure processing roles (summarizer, classifier, etc.). + inject_datetime=False suppresses the date/time header for pure processing roles. + inject_mode=False suppresses the session mode (OTR) line for pure processing roles. """ data = _load(username) roles = data.setdefault("roles", {}) @@ -436,6 +437,7 @@ def set_role_config( roles[role] = {} roles[role]["system_append"] = system_append.strip() roles[role]["inject_datetime"] = inject_datetime + roles[role]["inject_mode"] = inject_mode if tools is None: roles[role].pop("tools", None) else: @@ -445,12 +447,13 @@ def set_role_config( def get_role_config(username: str, role: str) -> dict: """ - Return supplemental config for a role: system_append, tools, and inject_datetime. + Return supplemental config for a role: system_append, tools, and injection flags. All keys are optional in the registry — missing means "use defaults": system_append: str — appended to the system prompt for this role tools: list[str] | None — explicit tool allow-list (None = no restriction) inject_datetime: bool — whether to inject current date/time (default True) + inject_mode: bool — whether to inject session mode (OTR) line (default True) """ registry = _load(username) role_cfg = registry.get("roles", {}).get(role, {}) @@ -458,6 +461,7 @@ def get_role_config(username: str, role: str) -> dict: "system_append": role_cfg.get("system_append", ""), "tools": role_cfg.get("tools") or None, "inject_datetime": role_cfg.get("inject_datetime", True), + "inject_mode": role_cfg.get("inject_mode", True), } diff --git a/cortex/openai_orchestrator.py b/cortex/openai_orchestrator.py index cc61aff..d8aba00 100644 --- a/cortex/openai_orchestrator.py +++ b/cortex/openai_orchestrator.py @@ -119,6 +119,7 @@ async def run( response=final_response, tool_calls=tool_call_log, backend="local", + backend_label=model_label, gemini_summary=final_response, ) diff --git a/cortex/orchestrator_engine.py b/cortex/orchestrator_engine.py index 718a920..9a93526 100644 --- a/cortex/orchestrator_engine.py +++ b/cortex/orchestrator_engine.py @@ -99,6 +99,7 @@ class OrchestratorResult: response: str # final user-facing response (from Claude) tool_calls: list[dict] = field(default_factory=list) # [{tool, args, result}] backend: str = "claude" # model that produced the final response + backend_label: str = "" # human-readable model label for display gemini_summary: str = "" # what Gemini handed to Claude (debug/display) checkpoint: OrchestrateCheckpoint | None = None # set when awaiting confirmation diff --git a/cortex/routers/chat.py b/cortex/routers/chat.py index 39ef95e..da8e920 100644 --- a/cortex/routers/chat.py +++ b/cortex/routers/chat.py @@ -14,6 +14,7 @@ from persona import set_context, validate as validate_persona from auth_utils import COOKIE_NAME, decode_token import model_registry import event_bus +from model_registry import get_role_config router = APIRouter() @@ -90,15 +91,18 @@ async def _stream_chat(req: ChatRequest): session_id = req.session_id or generate_session_id() tier = req.tier or settings.default_tier + role_cfg = get_role_config(user, req.chat_role) system_prompt = load_context( tier, include_long=req.include_long, include_mid=req.include_mid, include_short=req.include_short, + inject_datetime=role_cfg.get("inject_datetime", True), + inject_mode=role_cfg.get("inject_mode", True), mode="otr" if req.off_record else "chat", ) history = load_session(session_id) - history.append({"role": "user", "content": req.message}) + history.append({"role": "user", "content": req.message, "off_record": req.off_record}) task = asyncio.create_task(complete( system_prompt=system_prompt, @@ -128,6 +132,7 @@ async def _stream_chat(req: ChatRequest): "backend": actual_backend, "backend_label": backend_label, "host": host, + "off_record": req.off_record, }) save_session(session_id, history) if not req.off_record: @@ -228,8 +233,16 @@ async def get_backend(request: Request) -> dict: username = _request_user(request) available_roles = _available_roles_for_toggle(username) if username else [] p = settings.primary_backend + + orch_label = None + if username: + orch_cfg = model_registry.get_model_for_role(username, "orchestrator") + if orch_cfg: + orch_label = orch_cfg.get("label") or orch_cfg.get("model_name") or None + return { - "available_roles": available_roles, + "available_roles": available_roles, + "orchestrator_model": orch_label, # Legacy fields kept for backward compat "primary": p, "fallback": _BACKEND_FALLBACK.get(p, "claude"), diff --git a/cortex/routers/local_llm.py b/cortex/routers/local_llm.py index 16e5652..5d63538 100644 --- a/cortex/routers/local_llm.py +++ b/cortex/routers/local_llm.py @@ -323,8 +323,12 @@ def _render(username: str, success: str = "", error: str = "") -> str: f'' f' Inject current date & time into system prompt' f'' + f'' f'' - f'Disable for pure processing roles (summarizer, classifier, translator)' + f'Disable both for pure processing roles (summarizer, classifier, translator)' f'' f'
' f'