feat: per-role inject_mode, OTR fixes, hover metadata, send/stop tooltip

- inject_mode: per-role toggle (parallel to inject_datetime) gates the
  "Current mode: Off The Record" line in the system prompt; wired through
  model_registry, context_loader, chat router, orchestrator router, and
  local_llm settings UI

- OTR orchestrator fix: OrchestrateRequest now carries off_record;
  _finalize_job stores it per message and gates log_turn on it; JS
  orchestrate payload sends off_record correctly

- Per-message hover metadata: removed always-visible .model-tag; replaced
  with .msg-meta strip in the action bar (hover-only); shows model label,
  host, fallback indicator, and OTR badge; stored in session JSON

- Send/stop button tooltip: shows role + model and (when tools on)
  separate orchestrator model + engine label; live elapsed timer on stop
  button via startRunTimer/stopRunTimer

- OrchestratorResult.backend_label: new field; openai_orchestrator fills
  it; finalize_job propagates it to job dict and session messages

- GET /backend: exposes orchestrator_model label so the frontend tooltip
  can show both models separately

- TODO: session delete confirmation added

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Scott Idem
2026-05-09 16:12:03 -04:00
parent 0afa135ce9
commit 85792a7bcf
11 changed files with 229 additions and 50 deletions

View File

@@ -14,6 +14,7 @@ from persona import set_context, validate as validate_persona
from auth_utils import COOKIE_NAME, decode_token
import model_registry
import event_bus
from model_registry import get_role_config
router = APIRouter()
@@ -90,15 +91,18 @@ async def _stream_chat(req: ChatRequest):
session_id = req.session_id or generate_session_id()
tier = req.tier or settings.default_tier
role_cfg = get_role_config(user, req.chat_role)
system_prompt = load_context(
tier,
include_long=req.include_long,
include_mid=req.include_mid,
include_short=req.include_short,
inject_datetime=role_cfg.get("inject_datetime", True),
inject_mode=role_cfg.get("inject_mode", True),
mode="otr" if req.off_record else "chat",
)
history = load_session(session_id)
history.append({"role": "user", "content": req.message})
history.append({"role": "user", "content": req.message, "off_record": req.off_record})
task = asyncio.create_task(complete(
system_prompt=system_prompt,
@@ -128,6 +132,7 @@ async def _stream_chat(req: ChatRequest):
"backend": actual_backend,
"backend_label": backend_label,
"host": host,
"off_record": req.off_record,
})
save_session(session_id, history)
if not req.off_record:
@@ -228,8 +233,16 @@ async def get_backend(request: Request) -> dict:
username = _request_user(request)
available_roles = _available_roles_for_toggle(username) if username else []
p = settings.primary_backend
orch_label = None
if username:
orch_cfg = model_registry.get_model_for_role(username, "orchestrator")
if orch_cfg:
orch_label = orch_cfg.get("label") or orch_cfg.get("model_name") or None
return {
"available_roles": available_roles,
"available_roles": available_roles,
"orchestrator_model": orch_label,
# Legacy fields kept for backward compat
"primary": p,
"fallback": _BACKEND_FALLBACK.get(p, "claude"),

View File

@@ -323,8 +323,12 @@ def _render(username: str, success: str = "", error: str = "") -> str:
f'<input type="checkbox" class="rcp-datetime-cb" data-role="{role}" checked>'
f' Inject current date &amp; time into system prompt'
f'</label>'
f'<label class="rcp-check" style="margin-top:0.4rem">'
f'<input type="checkbox" class="rcp-mode-cb" data-role="{role}" checked>'
f' Inject session mode (Chat / Off The Record) into system prompt'
f'</label>'
f'<span class="rcp-hint" style="display:block;margin-top:0.2rem">'
f'Disable for pure processing roles (summarizer, classifier, translator)</span>'
f'Disable both for pure processing roles (summarizer, classifier, translator)</span>'
f'</div>'
f'<div class="rcp-field">'
f'<label class="rcp-label">Tool allow-list '
@@ -348,6 +352,7 @@ def _render(username: str, success: str = "", error: str = "") -> str:
"system_append": roles.get(role, {}).get("system_append", ""),
"tools": roles.get(role, {}).get("tools") or None,
"inject_datetime": roles.get(role, {}).get("inject_datetime", True),
"inject_mode": roles.get(role, {}).get("inject_mode", True),
}
for role in app_settings.get_defined_roles()
})
@@ -607,15 +612,19 @@ async def set_role_config(request: Request) -> JSONResponse:
system_append = body.get("system_append", "")
tools = body.get("tools") # list[str] or None
inject_datetime = body.get("inject_datetime", True)
inject_mode = body.get("inject_mode", True)
if not role:
return JSONResponse({"error": "role is required"}, status_code=400)
if tools is not None and not isinstance(tools, list):
return JSONResponse({"error": "tools must be a list or null"}, status_code=400)
reg.set_role_config(username, role, system_append, tools, inject_datetime=bool(inject_datetime))
logger.info("role config saved: %s %s (tools=%s inject_datetime=%s)",
username, role, len(tools) if tools is not None else "all", inject_datetime)
reg.set_role_config(username, role, system_append, tools,
inject_datetime=bool(inject_datetime),
inject_mode=bool(inject_mode))
logger.info("role config saved: %s %s (tools=%s inject_datetime=%s inject_mode=%s)",
username, role, len(tools) if tools is not None else "all",
inject_datetime, inject_mode)
return JSONResponse({"ok": True})

View File

@@ -12,6 +12,7 @@ Designed to be triggered from:
import asyncio
import logging
import platform
import uuid
from datetime import datetime, timezone
@@ -57,6 +58,7 @@ class OrchestrateRequest(BaseModel):
user: str = "scott"
persona: str = "inara"
chat_role: str = "chat" # role used for the final response (decoupled from tool-loop model)
off_record: bool = False # skip session log; inject OTR mode line into system prompt
class OrchestrateResponse(BaseModel):
@@ -74,6 +76,8 @@ class JobStatusResponse(BaseModel):
response: str | None = None
tool_calls: list[dict] | None = None
backend: str | None = None
backend_label: str | None = None
host: str | None = None
gemini_summary: str | None = None
error: str | None = None
pending_confirmation: dict | None = None # {tools: [{name, args}], message: str}
@@ -109,6 +113,7 @@ async def orchestrate(req: OrchestrateRequest) -> OrchestrateResponse:
"error": None,
"pending_confirmation": None,
"_user": user,
"_off_record": req.off_record,
}
async with _jobs_lock:
@@ -204,6 +209,8 @@ async def _run_job(job_id: str, req: OrchestrateRequest, user: str) -> None:
include_short=req.include_short,
role_append=role_cfg.get("system_append", ""),
inject_datetime=role_cfg.get("inject_datetime", True),
inject_mode=role_cfg.get("inject_mode", True),
mode="otr" if req.off_record else "chat",
)
session_id = req.session_id or generate_session_id()
@@ -270,7 +277,7 @@ async def _run_job(job_id: str, req: OrchestrateRequest, user: str) -> None:
job_id, len(result.checkpoint.pending_tools))
return
await _finalize_job(job_id, result, session_id, req.task, history)
await _finalize_job(job_id, result, session_id, req.task, history, off_record=req.off_record)
except Exception as e:
logger.exception("Orchestrator job failed: %s", job_id)
@@ -316,12 +323,13 @@ async def _resume_job(
return
async with _jobs_lock:
session_id = _jobs[job_id].get("session_id") or ""
task = _jobs[job_id].get("task", "")
session_id = _jobs[job_id].get("session_id") or ""
task = _jobs[job_id].get("task", "")
off_record = _jobs[job_id].get("_off_record", False)
from session_store import load as load_session
history = load_session(session_id) if session_id else []
await _finalize_job(job_id, result, session_id, task, history)
await _finalize_job(job_id, result, session_id, task, history, off_record=off_record)
except Exception as e:
logger.exception("Orchestrator resume failed: %s", job_id)
@@ -340,6 +348,7 @@ async def _finalize_job(
session_id: str,
task: str,
history: list,
off_record: bool = False,
) -> None:
"""Save session, log the turn, and mark the job complete."""
from session_store import save as save_session, generate_session_id
@@ -348,10 +357,19 @@ async def _finalize_job(
if not session_id:
session_id = generate_session_id()
history.append({"role": "user", "content": task})
history.append({"role": "assistant", "content": result.response})
host = platform.node()
history.append({"role": "user", "content": task, "off_record": off_record})
history.append({
"role": "assistant",
"content": result.response,
"backend": result.backend,
"backend_label": result.backend_label,
"host": host,
"off_record": off_record,
})
save_session(session_id, history)
log_turn(session_id, task, result.response)
if not off_record:
log_turn(session_id, task, result.response)
now = datetime.now(timezone.utc).isoformat()
async with _jobs_lock:
@@ -362,6 +380,8 @@ async def _finalize_job(
"response": result.response,
"tool_calls": result.tool_calls,
"backend": result.backend,
"backend_label": result.backend_label,
"host": host,
"gemini_summary": result.gemini_summary,
})
logger.info("Orchestrator job complete: %s (%d tool calls)", job_id, len(result.tool_calls))