From 85792a7bcffb5fcc648335b1f68d1e692e62bfde Mon Sep 17 00:00:00 2001
From: Scott Idem <stidem@gmail.com>
Date: Sat, 9 May 2026 16:12:03 -0400
Subject: [PATCH] feat: per-role inject_mode, OTR fixes, hover metadata,
 send/stop tooltip

- inject_mode: per-role toggle (parallel to inject_datetime) gates the
  "Current mode: Off The Record" line in the system prompt; wired through
  model_registry, context_loader, chat router, orchestrator router, and
  local_llm settings UI

- OTR orchestrator fix: OrchestrateRequest now carries off_record;
  _finalize_job stores it per message and gates log_turn on it; JS
  orchestrate payload sends off_record correctly

- Per-message hover metadata: removed always-visible .model-tag; replaced
  with .msg-meta strip in the action bar (hover-only); shows model label,
  host, fallback indicator, and OTR badge; stored in session JSON

- Send/stop button tooltip: shows role + model and (when tools on)
  separate orchestrator model + engine label; live elapsed timer on stop
  button via startRunTimer/stopRunTimer

- OrchestratorResult.backend_label: new field; openai_orchestrator fills
  it; finalize_job propagates it to job dict and session messages

- GET /backend: exposes orchestrator_model label so the frontend tooltip
  can show both models separately

- TODO: session delete confirmation added

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 cortex/context_loader.py       |   3 +-
 cortex/model_registry.py       |  12 ++-
 cortex/openai_orchestrator.py  |   1 +
 cortex/orchestrator_engine.py  |   1 +
 cortex/routers/chat.py         |  17 +++-
 cortex/routers/local_llm.py    |  17 +++-
 cortex/routers/orchestrator.py |  34 ++++++--
 cortex/static/app.js           | 143 ++++++++++++++++++++++++++++-----
 cortex/static/local_llm.html   |   8 +-
 cortex/static/style.css        |  36 ++++++---
 documentation/TODO__Agents.md  |   7 ++
 11 files changed, 229 insertions(+), 50 deletions(-)

diff --git a/cortex/context_loader.py b/cortex/context_loader.py
index bf7974e..3fe1e48 100644
--- a/cortex/context_loader.py
+++ b/cortex/context_loader.py
@@ -21,6 +21,7 @@ def load_context(
     include_short: bool = True,
     role_append: str = "",
     inject_datetime: bool = True,
+    inject_mode: bool = True,
     mode: str = "chat",
 ) -> str:
     """
@@ -45,7 +46,7 @@ def load_context(
     if inject_datetime:
         now = datetime.now().astimezone()
         system_lines.append(f"Current date and time: {now.strftime('%A, %Y-%m-%d at %I:%M %p %Z')}")
-    if mode == "otr":
+    if mode == "otr" and inject_mode:
         system_lines.append(
             "Current mode: Off The Record — "
             "this conversation is private and will not be logged or included in memory distillation"
diff --git a/cortex/model_registry.py b/cortex/model_registry.py
index f859796..d2d3e12 100644
--- a/cortex/model_registry.py
+++ b/cortex/model_registry.py
@@ -423,12 +423,13 @@ def set_role_config(
     system_append: str,
     tools: list[str] | None,
     inject_datetime: bool = True,
+    inject_mode: bool = True,
 ) -> None:
-    """Save system_append, tools allow-list, and inject_datetime flag for a role.
+    """Save system_append, tools allow-list, and per-injection flags for a role.
 
     tools=None clears the allow-list (role uses all accessible tools).
-    inject_datetime=False suppresses the current date/time from the system prompt
-    for this role — useful for pure processing roles (summarizer, classifier, etc.).
+    inject_datetime=False suppresses the date/time header for pure processing roles.
+    inject_mode=False suppresses the session mode (OTR) line for pure processing roles.
     """
     data = _load(username)
     roles = data.setdefault("roles", {})
@@ -436,6 +437,7 @@ def set_role_config(
         roles[role] = {}
     roles[role]["system_append"] = system_append.strip()
     roles[role]["inject_datetime"] = inject_datetime
+    roles[role]["inject_mode"] = inject_mode
     if tools is None:
         roles[role].pop("tools", None)
     else:
@@ -445,12 +447,13 @@ def set_role_config(
 
 def get_role_config(username: str, role: str) -> dict:
     """
-    Return supplemental config for a role: system_append, tools, and inject_datetime.
+    Return supplemental config for a role: system_append, tools, and injection flags.
 
     All keys are optional in the registry — missing means "use defaults":
       system_append: str   — appended to the system prompt for this role
       tools: list[str] | None — explicit tool allow-list (None = no restriction)
       inject_datetime: bool — whether to inject current date/time (default True)
+      inject_mode: bool — whether to inject session mode (OTR) line (default True)
     """
     registry = _load(username)
     role_cfg = registry.get("roles", {}).get(role, {})
@@ -458,6 +461,7 @@ def get_role_config(username: str, role: str) -> dict:
         "system_append":  role_cfg.get("system_append", ""),
         "tools":          role_cfg.get("tools") or None,
         "inject_datetime": role_cfg.get("inject_datetime", True),
+        "inject_mode":    role_cfg.get("inject_mode", True),
     }
 
 
diff --git a/cortex/openai_orchestrator.py b/cortex/openai_orchestrator.py
index cc61aff..d8aba00 100644
--- a/cortex/openai_orchestrator.py
+++ b/cortex/openai_orchestrator.py
@@ -119,6 +119,7 @@ async def run(
         response=final_response,
         tool_calls=tool_call_log,
         backend="local",
+        backend_label=model_label,
         gemini_summary=final_response,
     )
 
diff --git a/cortex/orchestrator_engine.py b/cortex/orchestrator_engine.py
index 718a920..9a93526 100644
--- a/cortex/orchestrator_engine.py
+++ b/cortex/orchestrator_engine.py
@@ -99,6 +99,7 @@ class OrchestratorResult:
     response: str                       # final user-facing response (from Claude)
     tool_calls: list[dict] = field(default_factory=list)  # [{tool, args, result}]
     backend: str = "claude"             # model that produced the final response
+    backend_label: str = ""             # human-readable model label for display
     gemini_summary: str = ""            # what Gemini handed to Claude (debug/display)
     checkpoint: OrchestrateCheckpoint | None = None  # set when awaiting confirmation
 
diff --git a/cortex/routers/chat.py b/cortex/routers/chat.py
index 39ef95e..da8e920 100644
--- a/cortex/routers/chat.py
+++ b/cortex/routers/chat.py
@@ -14,6 +14,7 @@ from persona import set_context, validate as validate_persona
 from auth_utils import COOKIE_NAME, decode_token
 import model_registry
 import event_bus
+from model_registry import get_role_config
 
 
 router = APIRouter()
@@ -90,15 +91,18 @@ async def _stream_chat(req: ChatRequest):
     session_id = req.session_id or generate_session_id()
     tier = req.tier or settings.default_tier
 
+    role_cfg = get_role_config(user, req.chat_role)
     system_prompt = load_context(
         tier,
         include_long=req.include_long,
         include_mid=req.include_mid,
         include_short=req.include_short,
+        inject_datetime=role_cfg.get("inject_datetime", True),
+        inject_mode=role_cfg.get("inject_mode", True),
         mode="otr" if req.off_record else "chat",
     )
     history = load_session(session_id)
-    history.append({"role": "user", "content": req.message})
+    history.append({"role": "user", "content": req.message, "off_record": req.off_record})
 
     task = asyncio.create_task(complete(
         system_prompt=system_prompt,
@@ -128,6 +132,7 @@ async def _stream_chat(req: ChatRequest):
                 "backend": actual_backend,
                 "backend_label": backend_label,
                 "host": host,
+                "off_record": req.off_record,
             })
             save_session(session_id, history)
             if not req.off_record:
@@ -228,8 +233,16 @@ async def get_backend(request: Request) -> dict:
     username        = _request_user(request)
     available_roles = _available_roles_for_toggle(username) if username else []
     p = settings.primary_backend
+
+    orch_label = None
+    if username:
+        orch_cfg = model_registry.get_model_for_role(username, "orchestrator")
+        if orch_cfg:
+            orch_label = orch_cfg.get("label") or orch_cfg.get("model_name") or None
+
     return {
-        "available_roles": available_roles,
+        "available_roles":   available_roles,
+        "orchestrator_model": orch_label,
         # Legacy fields kept for backward compat
         "primary":     p,
         "fallback":    _BACKEND_FALLBACK.get(p, "claude"),
diff --git a/cortex/routers/local_llm.py b/cortex/routers/local_llm.py
index 16e5652..5d63538 100644
--- a/cortex/routers/local_llm.py
+++ b/cortex/routers/local_llm.py
@@ -323,8 +323,12 @@ def _render(username: str, success: str = "", error: str = "") -> str:
             f'<input type="checkbox" class="rcp-datetime-cb" data-role="{role}" checked>'
             f' Inject current date &amp; time into system prompt'
             f'</label>'
+            f'<label class="rcp-check" style="margin-top:0.4rem">'
+            f'<input type="checkbox" class="rcp-mode-cb" data-role="{role}" checked>'
+            f' Inject session mode (Chat / Off The Record) into system prompt'
+            f'</label>'
             f'<span class="rcp-hint" style="display:block;margin-top:0.2rem">'
-            f'Disable for pure processing roles (summarizer, classifier, translator)</span>'
+            f'Disable both for pure processing roles (summarizer, classifier, translator)</span>'
             f'</div>'
             f'<div class="rcp-field">'
             f'<label class="rcp-label">Tool allow-list '
@@ -348,6 +352,7 @@ def _render(username: str, success: str = "", error: str = "") -> str:
             "system_append":  roles.get(role, {}).get("system_append", ""),
             "tools":          roles.get(role, {}).get("tools") or None,
             "inject_datetime": roles.get(role, {}).get("inject_datetime", True),
+            "inject_mode":    roles.get(role, {}).get("inject_mode", True),
         }
         for role in app_settings.get_defined_roles()
     })
@@ -607,15 +612,19 @@ async def set_role_config(request: Request) -> JSONResponse:
     system_append    = body.get("system_append", "")
     tools            = body.get("tools")          # list[str] or None
     inject_datetime  = body.get("inject_datetime", True)
+    inject_mode      = body.get("inject_mode", True)
 
     if not role:
         return JSONResponse({"error": "role is required"}, status_code=400)
     if tools is not None and not isinstance(tools, list):
         return JSONResponse({"error": "tools must be a list or null"}, status_code=400)
 
-    reg.set_role_config(username, role, system_append, tools, inject_datetime=bool(inject_datetime))
-    logger.info("role config saved: %s %s (tools=%s inject_datetime=%s)",
-                username, role, len(tools) if tools is not None else "all", inject_datetime)
+    reg.set_role_config(username, role, system_append, tools,
+                        inject_datetime=bool(inject_datetime),
+                        inject_mode=bool(inject_mode))
+    logger.info("role config saved: %s %s (tools=%s inject_datetime=%s inject_mode=%s)",
+                username, role, len(tools) if tools is not None else "all",
+                inject_datetime, inject_mode)
     return JSONResponse({"ok": True})
 
 
diff --git a/cortex/routers/orchestrator.py b/cortex/routers/orchestrator.py
index b574ced..0c891f7 100644
--- a/cortex/routers/orchestrator.py
+++ b/cortex/routers/orchestrator.py
@@ -12,6 +12,7 @@ Designed to be triggered from:
 
 import asyncio
 import logging
+import platform
 import uuid
 from datetime import datetime, timezone
 
@@ -57,6 +58,7 @@ class OrchestrateRequest(BaseModel):
     user: str = "scott"
     persona: str = "inara"
     chat_role: str = "chat"             # role used for the final response (decoupled from tool-loop model)
+    off_record: bool = False            # skip session log; inject OTR mode line into system prompt
 
 
 class OrchestrateResponse(BaseModel):
@@ -74,6 +76,8 @@ class JobStatusResponse(BaseModel):
     response: str | None = None
     tool_calls: list[dict] | None = None
     backend: str | None = None
+    backend_label: str | None = None
+    host: str | None = None
     gemini_summary: str | None = None
     error: str | None = None
     pending_confirmation: dict | None = None  # {tools: [{name, args}], message: str}
@@ -109,6 +113,7 @@ async def orchestrate(req: OrchestrateRequest) -> OrchestrateResponse:
         "error": None,
         "pending_confirmation": None,
         "_user": user,
+        "_off_record": req.off_record,
     }
 
     async with _jobs_lock:
@@ -204,6 +209,8 @@ async def _run_job(job_id: str, req: OrchestrateRequest, user: str) -> None:
             include_short=req.include_short,
             role_append=role_cfg.get("system_append", ""),
             inject_datetime=role_cfg.get("inject_datetime", True),
+            inject_mode=role_cfg.get("inject_mode", True),
+            mode="otr" if req.off_record else "chat",
         )
 
         session_id = req.session_id or generate_session_id()
@@ -270,7 +277,7 @@ async def _run_job(job_id: str, req: OrchestrateRequest, user: str) -> None:
                         job_id, len(result.checkpoint.pending_tools))
             return
 
-        await _finalize_job(job_id, result, session_id, req.task, history)
+        await _finalize_job(job_id, result, session_id, req.task, history, off_record=req.off_record)
 
     except Exception as e:
         logger.exception("Orchestrator job failed: %s", job_id)
@@ -316,12 +323,13 @@ async def _resume_job(
             return
 
         async with _jobs_lock:
-            session_id = _jobs[job_id].get("session_id") or ""
-            task = _jobs[job_id].get("task", "")
+            session_id  = _jobs[job_id].get("session_id") or ""
+            task        = _jobs[job_id].get("task", "")
+            off_record  = _jobs[job_id].get("_off_record", False)
 
         from session_store import load as load_session
         history = load_session(session_id) if session_id else []
-        await _finalize_job(job_id, result, session_id, task, history)
+        await _finalize_job(job_id, result, session_id, task, history, off_record=off_record)
 
     except Exception as e:
         logger.exception("Orchestrator resume failed: %s", job_id)
@@ -340,6 +348,7 @@ async def _finalize_job(
     session_id: str,
     task: str,
     history: list,
+    off_record: bool = False,
 ) -> None:
     """Save session, log the turn, and mark the job complete."""
     from session_store import save as save_session, generate_session_id
@@ -348,10 +357,19 @@ async def _finalize_job(
     if not session_id:
         session_id = generate_session_id()
 
-    history.append({"role": "user", "content": task})
-    history.append({"role": "assistant", "content": result.response})
+    host = platform.node()
+    history.append({"role": "user", "content": task, "off_record": off_record})
+    history.append({
+        "role": "assistant",
+        "content": result.response,
+        "backend": result.backend,
+        "backend_label": result.backend_label,
+        "host": host,
+        "off_record": off_record,
+    })
     save_session(session_id, history)
-    log_turn(session_id, task, result.response)
+    if not off_record:
+        log_turn(session_id, task, result.response)
 
     now = datetime.now(timezone.utc).isoformat()
     async with _jobs_lock:
@@ -362,6 +380,8 @@ async def _finalize_job(
             "response": result.response,
             "tool_calls": result.tool_calls,
             "backend": result.backend,
+            "backend_label": result.backend_label,
+            "host": host,
             "gemini_summary": result.gemini_summary,
         })
     logger.info("Orchestrator job complete: %s (%d tool calls)", job_id, len(result.tool_calls))
diff --git a/cortex/static/app.js b/cortex/static/app.js
index a645a59..4d7e755 100644
--- a/cortex/static/app.js
+++ b/cortex/static/app.js
@@ -279,6 +279,7 @@
                 ? { icon: 'zap', label: 'Run' }
                 : sd;
             sendBtn.innerHTML = icon_html(effectiveSd.icon) + ' ' + effectiveSd.label;
+            updateSendBtnTitle();
 
             render_icons();
             updateInputPlaceholder();
@@ -315,6 +316,8 @@
         // When on: submit goes to POST /orchestrate (Gemini tool loop → active role responds).
         // When off: submit goes to POST /chat (direct to active role, no tools).
         let toolsEnabled = localStorage.getItem('tools-enabled') === 'true';
+        let _runStart = 0;
+        let _runTimer = null;
 
         function updateToolsToggleUI() {
             tools_toggle_el.classList.toggle('local-on', toolsEnabled);
@@ -331,6 +334,56 @@
             updateToolsToggleUI();
         });
 
+        function updateSendBtnTitle() {
+            const role    = activeRole();
+            const rmodel  = role?.model_label || '(server default)';
+            const rname   = role?.label || 'Chat';
+            const mode    = current_mode === 'otr' ? 'Off The Record'
+                          : current_mode === 'note' ? 'Note'
+                          : 'Chat';
+            const useOrch = toolsEnabled && current_mode !== 'note';
+
+            let lines;
+            if (useOrch) {
+                const omodel = orchestratorModel || '(server default)';
+                lines = [
+                    `Role: ${rname}  ·  ${rmodel}`,
+                    `Orchestrator: ${omodel}  (tool loop)`,
+                    `Mode: ${mode}`,
+                ];
+            } else {
+                lines = [
+                    `Role: ${rname}  ·  ${rmodel}`,
+                    `Mode: ${mode}`,
+                    `Engine: Direct (no tool loop)`,
+                ];
+            }
+            sendBtn.title = lines.join('\n');
+        }
+
+        function startRunTimer() {
+            _runStart = Date.now();
+            function tick() {
+                const secs  = Math.floor((Date.now() - _runStart) / 1000);
+                const role  = activeRole();
+                const rname = role?.label || 'Chat';
+                const useOrch = toolsEnabled && current_mode !== 'note';
+                const model = useOrch
+                    ? (orchestratorModel || '(server default)') + '  (tool loop)'
+                    : (role?.model_label || '(server default)');
+                stopBtn.title = `Running: ${rname} · ${model}\nElapsed: ${secs}s — click to cancel`;
+            }
+            tick();
+            _runTimer = setInterval(tick, 1000);
+        }
+
+        function stopRunTimer() {
+            clearInterval(_runTimer);
+            _runTimer = null;
+            stopBtn.title = '';
+            updateSendBtnTitle();
+        }
+
         // ── Settings dropdown ─────────────────────────────────────────
         settings_btn_el.addEventListener('click', (e) => {
             e.stopPropagation();
@@ -414,8 +467,9 @@
         const TYPE_CLASS = { claude_cli: '', gemini_api: 'mem-on', gemini_cli: 'mem-on', local_openai: 'local-on' };
         const backendModelHint = document.getElementById('backend-model-hint');
 
-        let availableRoles = [];  // [{role, label, model_label, type}] from /backend
-        let roleIdx        = 0;
+        let availableRoles  = [];  // [{role, label, model_label, type}] from /backend
+        let roleIdx         = 0;
+        let orchestratorModel = null;  // label of the orchestrator-role model
 
         function activeRole() {
             return availableRoles.length > 0 ? availableRoles[roleIdx] : null;
@@ -434,11 +488,13 @@
                 backendModelHint.textContent   = hint;
                 backendModelHint.style.display = hint ? '' : 'none';
             }
+            updateSendBtnTitle();
         }
 
         fetch('/backend').then(r => r.json()).then(d => {
-            availableRoles = d.available_roles || [];
-            roleIdx        = 0;
+            availableRoles    = d.available_roles || [];
+            orchestratorModel = d.orchestrator_model || null;
+            roleIdx           = 0;
             setRoleToggleUI(availableRoles[0] || null);
             _maybeShowNoBanner(availableRoles);
         });
@@ -686,13 +742,11 @@
                 currentHistory.push({ role, content: msg.content });
                 const msgDiv = addMessage(role, msg.content);
                 attachHistoryControls(msgDiv, i);
-                if (role === 'assistant' && (msg.backend_label || msg.backend)) {
-                    const modelTag = document.createElement('div');
-                    modelTag.className = 'model-tag';
-                    const label = msg.backend_label || msg.backend;
-                    modelTag.textContent = msg.host ? `${label} · ${msg.host}` : label;
-                    msgDiv.appendChild(modelTag);
-                }
+                setMessageMeta(msgDiv, {
+                    label:   (role === 'assistant') ? (msg.backend_label || msg.backend || '') : '',
+                    host:    msg.host || '',
+                    otr:     !!msg.off_record,
+                });
             }
 
             if (!silent) addMessage('system', `Resumed session: ${displayName}`);
@@ -703,6 +757,37 @@
             persist_session();
         }
 
+        // ── Message metadata (hover bar) ─────────────────────────────
+        function setMessageMeta(msgDiv, {label = '', host = '', fallback = false, otr = false} = {}) {
+            const wrapper = msgDiv.closest ? msgDiv.closest('.msg-wrapper') : msgDiv.parentElement;
+            if (!wrapper) return;
+            const actionsDiv = wrapper.querySelector('.msg-actions');
+            if (!actionsDiv) return;
+
+            const existing = actionsDiv.querySelector('.msg-meta');
+            if (existing) existing.remove();
+
+            if (!label && !otr) return;
+
+            const meta = document.createElement('span');
+            meta.className = 'msg-meta';
+
+            if (label) {
+                const modelSpan = document.createElement('span');
+                modelSpan.className = 'msg-meta-model' + (fallback ? ' fallback' : '');
+                modelSpan.textContent = (fallback ? '⚡ ' : '') + label + (host ? ' · ' + host : '');
+                meta.appendChild(modelSpan);
+            }
+            if (otr) {
+                const badge = document.createElement('span');
+                badge.className = 'msg-meta-badge otr';
+                badge.textContent = 'OTR';
+                meta.appendChild(badge);
+            }
+
+            actionsDiv.insertBefore(meta, actionsDiv.firstChild);
+        }
+
         function timeAgo(iso) {
             if (!iso) return '?';
             const mins = Math.floor((Date.now() - new Date(iso)) / 60000);
@@ -1115,15 +1200,12 @@
                             currentHistory.push({ role: 'assistant', content: data.response });
                             attachHistoryControls(thinkingDiv, assistHistIdx);
 
-                            // Model tag — always shown, amber if fallback was used
-                            const modelTag = document.createElement('div');
-                            modelTag.className = 'model-tag' + (data.fallback_used ? ' fallback' : '');
-                            const label = data.backend_label || data.backend || '';
-                            const hostSuffix = data.host ? ` · ${data.host}` : '';
-                            modelTag.textContent = data.fallback_used
-                                ? `⚡ fallback → ${label}${hostSuffix}`
-                                : `${label}${hostSuffix}`;
-                            thinkingDiv.appendChild(modelTag);
+                            setMessageMeta(thinkingDiv, {
+                                label:   data.backend_label || data.backend || '',
+                                host:    data.host || '',
+                                fallback: !!data.fallback_used,
+                                otr:     current_mode === 'otr',
+                            });
                         } else if (data.type === 'error') {
                             throw new Error(data.message);
                         }
@@ -1157,6 +1239,7 @@
                         sendBtn.style.display = 'none';
                         stopBtn.style.display = 'flex';
                         headerEmoji.classList.add('processing');
+                        startRunTimer();
 
                         await _doSend(payload, thinkingDiv);
 
@@ -1164,6 +1247,7 @@
                         headerEmoji.classList.remove('processing');
                         sendBtn.style.display = 'block';
                         stopBtn.style.display = 'none';
+                        stopRunTimer();
                         inputEl.focus();
                     });
                     thinkingDiv.appendChild(retryBtn);
@@ -1182,13 +1266,17 @@
             sendBtn.style.display = 'none';
             stopBtn.style.display = 'flex';
             headerEmoji.classList.add('processing');
+            startRunTimer();
 
             activeController = new AbortController();
 
+            const isOtr = current_mode === 'otr';
+
             const userHistIdx = currentHistory.length;
             currentHistory.push({ role: 'user', content: text });
             const userMsgDiv = addMessage('user', text);
             attachHistoryControls(userMsgDiv, userHistIdx);
+            if (isOtr) setMessageMeta(userMsgDiv, {otr: true});
             scrollToBottom();
 
             const thinkingDiv = addMessage('assistant thinking', '✨ thinking…');
@@ -1200,7 +1288,7 @@
                 include_long: memLong,
                 include_mid: memMid,
                 include_short: memShort,
-                off_record: current_mode === 'otr',
+                off_record: isOtr,
                 chat_role: activeRole()?.role || 'chat',
                 user: CORTEX_USER,
                 persona: CORTEX_PERSONA,
@@ -1212,12 +1300,14 @@
             headerEmoji.classList.remove('processing');
             sendBtn.style.display = 'block';
             stopBtn.style.display = 'none';
+            stopRunTimer();
             inputEl.focus();
         }
 
         // Extracted so the retry button can call it without re-adding the
         // user message to the DOM or currentHistory.
         async function _doOrchestrate(text, thinkingDiv, userMsgDiv) {
+            const submitOtr = current_mode === 'otr';
             try {
                 const res = await fetch('/orchestrate', {
                     method: 'POST',
@@ -1229,6 +1319,7 @@
                         include_long: memLong,
                         include_mid: memMid,
                         include_short: memShort,
+                        off_record: current_mode === 'otr',
                         chat_role: activeRole()?.role || 'chat',
                         user: CORTEX_USER,
                         persona: CORTEX_PERSONA,
@@ -1312,6 +1403,12 @@
                 const assistHistIdx = currentHistory.length;
                 currentHistory.push({ role: 'assistant', content: job.response || '' });
                 attachHistoryControls(thinkingDiv, assistHistIdx);
+                setMessageMeta(thinkingDiv, {
+                    label:   job.backend_label || job.backend || '',
+                    host:    job.host || '',
+                    otr:     submitOtr,
+                });
+                if (submitOtr) setMessageMeta(userMsgDiv, {otr: true});
 
                 renderToolCalls(job.tool_calls, thinkingDiv.parentElement);
 
@@ -1341,6 +1438,7 @@
                         sendBtn.style.display = 'none';
                         stopBtn.style.display = 'flex';
                         headerEmoji.classList.add('processing');
+                        startRunTimer();
 
                         await _doOrchestrate(text, thinkingDiv, userMsgDiv);
 
@@ -1348,6 +1446,7 @@
                         headerEmoji.classList.remove('processing');
                         sendBtn.style.display = 'block';
                         stopBtn.style.display = 'none';
+                        stopRunTimer();
                         inputEl.focus();
                     });
                     thinkingDiv.appendChild(retryBtn);
@@ -1364,6 +1463,7 @@
             sendBtn.style.display = 'none';
             stopBtn.style.display = 'flex';
             headerEmoji.classList.add('processing');
+            startRunTimer();
 
             activeController = new AbortController();
 
@@ -1379,6 +1479,7 @@
             headerEmoji.classList.remove('processing');
             sendBtn.style.display = 'block';
             stopBtn.style.display = 'none';
+            stopRunTimer();
             inputEl.focus();
         }
 
diff --git a/cortex/static/local_llm.html b/cortex/static/local_llm.html
index 36c368c..f553d70 100644
--- a/cortex/static/local_llm.html
+++ b/cortex/static/local_llm.html
@@ -634,6 +634,9 @@
       // Inject datetime checkbox (default true if not set)
       const dtCb = panel.querySelector('.rcp-datetime-cb');
       if (dtCb) dtCb.checked = cfg.inject_datetime !== false;
+      // Inject mode checkbox (default true if not set)
+      const modeCb = panel.querySelector('.rcp-mode-cb');
+      if (modeCb) modeCb.checked = cfg.inject_mode !== false;
       // Build tool checklist
       buildToolChecklist(role, cfg.tools || null);
       panel.classList.add('open');
@@ -674,6 +677,8 @@
         const ta     = panel.querySelector('.rcp-textarea');
         const dtCb   = panel.querySelector('.rcp-datetime-cb');
         const inject_datetime = dtCb ? dtCb.checked : true;
+        const modeCb = panel.querySelector('.rcp-mode-cb');
+        const inject_mode = modeCb ? modeCb.checked : true;
         const checks = [...panel.querySelectorAll('.rcp-tools input[type=checkbox]')];
         const allChecked  = checks.every(c => c.checked);
         const someChecked = checks.some(c  => c.checked);
@@ -684,7 +689,7 @@
           const res  = await fetch('/api/models/role-config', {
             method: 'POST',
             headers: {'Content-Type': 'application/json'},
-            body: JSON.stringify({role, system_append: ta.value, tools, inject_datetime}),
+            body: JSON.stringify({role, system_append: ta.value, tools, inject_datetime, inject_mode}),
           });
           const data = await res.json();
           if (data.ok) {
@@ -693,6 +698,7 @@
             ROLE_CONFIG_DATA[role].system_append = ta.value;
             ROLE_CONFIG_DATA[role].tools = tools;
             ROLE_CONFIG_DATA[role].inject_datetime = inject_datetime;
+            ROLE_CONFIG_DATA[role].inject_mode = inject_mode;
             showToast(`${role} config saved`);
             closeRolePanel(role);
           } else {
diff --git a/cortex/static/style.css b/cortex/static/style.css
index 842bb63..74e2ea9 100644
--- a/cortex/static/style.css
+++ b/cortex/static/style.css
@@ -614,18 +614,34 @@
         .copy-btn:hover  { color: var(--text); border-color: var(--muted); }
         .copy-btn.copied { color: var(--success); border-color: var(--success-dim); }
 
-        /* Model tag — shown at the bottom of every assistant message */
-        .model-tag {
-            display: block;
-            font-size: 0.67rem;
-            color: #475569;
-            margin-top: 0.55rem;
-            padding-top: 0.4rem;
-            border-top: 1px solid #2d3148;
-            text-align: right;
+        /* Message metadata — shown in the hover bar below the bubble */
+        .msg-meta {
+            display: flex;
+            align-items: center;
+            gap: 5px;
+            flex: 1;
+            min-width: 0;
+            font-size: 0.62rem;
+            color: var(--dim);
             letter-spacing: 0.02em;
+            overflow: hidden;
         }
-        .model-tag.fallback { color: #f59e0b; }
+        .msg-meta-model {
+            overflow: hidden;
+            text-overflow: ellipsis;
+            white-space: nowrap;
+        }
+        .msg-meta-model.fallback { color: #f59e0b; }
+        .msg-meta-badge {
+            flex-shrink: 0;
+            padding: 1px 5px;
+            border-radius: 3px;
+            font-size: 0.6rem;
+            font-weight: 600;
+            letter-spacing: 0.04em;
+        }
+        .msg-meta-badge.otr { background: #1e1b4b; color: #818cf8; }
+        [data-theme="light"] .msg-meta-badge.otr { background: #ede9fe; color: #5b21b6; }
 
         /* Retry button — shown in error message bubbles */
         .retry-btn {
diff --git a/documentation/TODO__Agents.md b/documentation/TODO__Agents.md
index 847c252..ad3ce0f 100644
--- a/documentation/TODO__Agents.md
+++ b/documentation/TODO__Agents.md
@@ -116,6 +116,13 @@ Inara reaches out on her own initiative via NC Talk, Google Chat, email, or brow
 - [x] `POST /api/push/test` + `POST /api/push/reminders/check` — on-demand test endpoints
 - [x] `push_utils.py` — fixed `pywebpush` 2.x key deserialisation (use `Vapid.from_pem()` instead of passing PEM string)
 
+### [UX] Session delete confirmation
+The session delete button in the sidebar needs a confirmation step before firing — currently
+it deletes immediately on click with no undo. A simple `confirm()` dialog or an inline
+"Are you sure? [Delete] [Cancel]" reveal would prevent accidental data loss.
+- [ ] Add confirm step to session delete button click handler in `app.js`
+- [ ] Consider: also confirm for message-level delete (Edit/Delete hover controls)
+
 ### [UI] File attachments in chat
 Upload an image or document inline and have it flow into context. Natural workflow
 ("here's this PDF, summarize it"); local backend already supports multimodal via Open WebUI.