feat: proper confirmation-resume flow + per-user tool policy
Fixes the broken confirmation gate where users had no way to approve
or deny a blocked tool call in the web UI.
Changes:
- orchestrator_engine.py: add OrchestrateCheckpoint dataclass, extract
loop into _run_from_contents(), add resume() function
- openai_orchestrator.py: same treatment — _run_from_messages(), resume()
- routers/orchestrator.py: POST /{job_id}/confirm and /deny endpoints,
separate _checkpoints store, _resume_job() + _finalize_job() helpers,
"awaiting_confirmation" job status with pending_confirmation payload
- auth_utils.py: get_tool_policy() and save_tool_policy() helpers reading
home/{user}/tool_policy.json (allow/deny lists)
- routers/orchestrator.py: loads tool_policy per user and passes
confirm_allow/confirm_deny to both engines
- app.js: poll loop handles awaiting_confirmation — shows Confirm/Deny
buttons inline, resumes polling after user action
- settings.html + settings.py: Tool Permissions section with allow/deny
textareas, POST /settings/tool-policy route
- style.css: .confirm-gate, .confirm-btn, .deny-btn styles
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -44,12 +44,39 @@ Keep your summary factual and complete. Include relevant URLs, data, and specifi
|
||||
If no tools are needed, return an empty summary."""
|
||||
|
||||
|
||||
@dataclass
|
||||
class OrchestrateCheckpoint:
|
||||
"""Saved execution state for a job paused at a confirmation gate."""
|
||||
engine: str # "gemini" | "openai"
|
||||
pre_fn_state: list # conversation state before function responses
|
||||
executed_results: list[dict] # tools that already ran this round
|
||||
pending_tools: list[dict] # [{name, args}] awaiting confirmation
|
||||
tool_call_log: list[dict] # all tool calls so far
|
||||
task: str
|
||||
# Gemini-specific config (unused by openai engine)
|
||||
system_prompt: str = ""
|
||||
session_messages: list | None = None
|
||||
model_name: str | None = None
|
||||
gemini_api_key: str | None = None
|
||||
respond_with_claude: bool = True
|
||||
response_role: str = "chat"
|
||||
# OpenAI-specific config (unused by gemini engine)
|
||||
model_cfg: dict | None = None
|
||||
respond_with_final: bool = True
|
||||
# Common
|
||||
user_role: str = "user"
|
||||
confirm_allow: frozenset = field(default_factory=frozenset)
|
||||
confirm_deny: frozenset = field(default_factory=frozenset)
|
||||
rounds_used: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class OrchestratorResult:
|
||||
response: str # final user-facing response (from Claude)
|
||||
tool_calls: list[dict] = field(default_factory=list) # [{tool, args, result}]
|
||||
backend: str = "claude" # model that produced the final response
|
||||
gemini_summary: str = "" # what Gemini handed to Claude (debug/display)
|
||||
checkpoint: OrchestrateCheckpoint | None = None # set when awaiting confirmation
|
||||
|
||||
|
||||
async def run(
|
||||
@@ -61,6 +88,8 @@ async def run(
|
||||
model_name: str | None = None,
|
||||
response_role: str = "chat",
|
||||
user_role: str = "user",
|
||||
confirm_allow: set[str] | None = None,
|
||||
confirm_deny: set[str] | None = None,
|
||||
) -> OrchestratorResult:
|
||||
"""
|
||||
Run the full orchestration loop for a task.
|
||||
@@ -72,9 +101,11 @@ async def run(
|
||||
respond_with_claude: If False, return Gemini's summary as the response (useful for
|
||||
background/cron tasks where a polished reply isn't needed)
|
||||
gemini_api_key: Per-user Gemini API key (falls back to GEMINI_API_KEY in .env)
|
||||
confirm_allow: Tools to bypass the confirmation gate for this user
|
||||
confirm_deny: Tools to always block for this user
|
||||
|
||||
Returns:
|
||||
OrchestratorResult with response, tool call log, backend used, and Gemini summary
|
||||
OrchestratorResult — if checkpoint is set, the job is awaiting confirmation
|
||||
"""
|
||||
api_key = gemini_api_key or settings.gemini_api_key
|
||||
if not api_key:
|
||||
@@ -85,19 +116,157 @@ async def run(
|
||||
|
||||
client = genai.Client(api_key=api_key)
|
||||
|
||||
# Seed Gemini with the task — include recent session context if available
|
||||
_confirm_allow = frozenset(confirm_allow or ())
|
||||
_confirm_deny = frozenset(confirm_deny or ())
|
||||
effective_confirm = (CONFIRM_REQUIRED - set(_confirm_allow)) | set(_confirm_deny)
|
||||
|
||||
task_with_context = _build_task_prompt(task, session_messages)
|
||||
contents: list[types.Content] = [
|
||||
types.Content(role="user", parts=[types.Part(text=task_with_context)])
|
||||
]
|
||||
|
||||
tool_declarations, tool_callables = get_tools_for_role(user_role)
|
||||
|
||||
tool_call_log: list[dict] = []
|
||||
|
||||
gemini_summary, checkpoint = await _run_from_contents(
|
||||
client=client,
|
||||
contents=contents,
|
||||
tool_declarations=tool_declarations,
|
||||
tool_callables=tool_callables,
|
||||
tool_call_log=tool_call_log,
|
||||
effective_confirm=effective_confirm,
|
||||
model_name=model_name,
|
||||
task=task,
|
||||
system_prompt=system_prompt,
|
||||
session_messages=session_messages,
|
||||
respond_with_claude=respond_with_claude,
|
||||
response_role=response_role,
|
||||
user_role=user_role,
|
||||
confirm_allow=_confirm_allow,
|
||||
confirm_deny=_confirm_deny,
|
||||
starting_round=0,
|
||||
gemini_api_key=api_key,
|
||||
)
|
||||
|
||||
if checkpoint:
|
||||
return OrchestratorResult(
|
||||
response=gemini_summary,
|
||||
tool_calls=list(tool_call_log),
|
||||
backend="gemini",
|
||||
gemini_summary=gemini_summary,
|
||||
checkpoint=checkpoint,
|
||||
)
|
||||
|
||||
return await _claude_handoff(
|
||||
task=task,
|
||||
tool_call_log=tool_call_log,
|
||||
gemini_summary=gemini_summary,
|
||||
system_prompt=system_prompt,
|
||||
session_messages=session_messages,
|
||||
respond_with_claude=respond_with_claude,
|
||||
response_role=response_role,
|
||||
)
|
||||
|
||||
|
||||
async def resume(checkpoint: OrchestrateCheckpoint, confirmed: bool) -> OrchestratorResult:
|
||||
"""Continue a job that was paused at a confirmation gate."""
|
||||
api_key = checkpoint.gemini_api_key or settings.gemini_api_key
|
||||
client = genai.Client(api_key=api_key)
|
||||
tool_declarations, tool_callables = get_tools_for_role(checkpoint.user_role)
|
||||
|
||||
effective_confirm = (CONFIRM_REQUIRED - set(checkpoint.confirm_allow)) | set(checkpoint.confirm_deny)
|
||||
|
||||
# Rebuild from saved state — strip "[awaiting confirmation]" placeholders
|
||||
contents = list(checkpoint.pre_fn_state)
|
||||
tool_call_log = [t for t in checkpoint.tool_call_log if t["result"] != "[awaiting confirmation]"]
|
||||
|
||||
# Build function responses for this round
|
||||
response_parts: list[types.Part] = []
|
||||
|
||||
for er in checkpoint.executed_results:
|
||||
response_parts.append(types.Part(function_response=types.FunctionResponse(
|
||||
name=er["name"], response={"result": er["result"]}
|
||||
)))
|
||||
|
||||
for pt in checkpoint.pending_tools:
|
||||
if confirmed:
|
||||
result_str = await _execute_tool(pt["name"], pt["args"], tool_callables)
|
||||
logger.info("Confirmed tool %s → %d chars", pt["name"], len(result_str))
|
||||
else:
|
||||
result_str = "Action denied by user."
|
||||
logger.info("Tool %s denied by user", pt["name"])
|
||||
tool_call_log.append({"tool": pt["name"], "args": pt["args"], "result": result_str})
|
||||
response_parts.append(types.Part(function_response=types.FunctionResponse(
|
||||
name=pt["name"], response={"result": result_str}
|
||||
)))
|
||||
|
||||
contents.append(types.Content(role="user", parts=response_parts))
|
||||
|
||||
gemini_summary, new_checkpoint = await _run_from_contents(
|
||||
client=client,
|
||||
contents=contents,
|
||||
tool_declarations=tool_declarations,
|
||||
tool_callables=tool_callables,
|
||||
tool_call_log=tool_call_log,
|
||||
effective_confirm=effective_confirm,
|
||||
model_name=checkpoint.model_name,
|
||||
task=checkpoint.task,
|
||||
system_prompt=checkpoint.system_prompt,
|
||||
session_messages=checkpoint.session_messages,
|
||||
respond_with_claude=checkpoint.respond_with_claude,
|
||||
response_role=checkpoint.response_role,
|
||||
user_role=checkpoint.user_role,
|
||||
confirm_allow=checkpoint.confirm_allow,
|
||||
confirm_deny=checkpoint.confirm_deny,
|
||||
starting_round=checkpoint.rounds_used,
|
||||
gemini_api_key=api_key,
|
||||
)
|
||||
|
||||
if new_checkpoint:
|
||||
return OrchestratorResult(
|
||||
response=gemini_summary,
|
||||
tool_calls=list(tool_call_log),
|
||||
backend="gemini",
|
||||
gemini_summary=gemini_summary,
|
||||
checkpoint=new_checkpoint,
|
||||
)
|
||||
|
||||
return await _claude_handoff(
|
||||
task=checkpoint.task,
|
||||
tool_call_log=tool_call_log,
|
||||
gemini_summary=gemini_summary,
|
||||
system_prompt=checkpoint.system_prompt,
|
||||
session_messages=checkpoint.session_messages,
|
||||
respond_with_claude=checkpoint.respond_with_claude,
|
||||
response_role=checkpoint.response_role,
|
||||
)
|
||||
|
||||
|
||||
async def _run_from_contents(
|
||||
client,
|
||||
contents: list,
|
||||
tool_declarations: list,
|
||||
tool_callables: dict,
|
||||
tool_call_log: list[dict],
|
||||
effective_confirm: set[str],
|
||||
model_name: str | None,
|
||||
task: str,
|
||||
system_prompt: str,
|
||||
session_messages: list[dict] | None,
|
||||
respond_with_claude: bool,
|
||||
response_role: str,
|
||||
user_role: str,
|
||||
confirm_allow: frozenset,
|
||||
confirm_deny: frozenset,
|
||||
starting_round: int = 0,
|
||||
gemini_api_key: str | None = None,
|
||||
) -> tuple[str, OrchestrateCheckpoint | None]:
|
||||
"""
|
||||
Run the ReAct loop from the current contents state.
|
||||
Returns (gemini_summary, checkpoint) — checkpoint is set if confirmation is needed.
|
||||
"""
|
||||
gemini_summary = ""
|
||||
|
||||
# --- ReAct tool loop ---
|
||||
for round_num in range(settings.orchestrator_max_rounds):
|
||||
for round_num in range(starting_round, settings.orchestrator_max_rounds):
|
||||
logger.info("Orchestrator round %d for task: %.80s", round_num + 1, task)
|
||||
|
||||
response = await asyncio.to_thread(
|
||||
@@ -113,67 +282,56 @@ async def run(
|
||||
candidate = response.candidates[0]
|
||||
parts = candidate.content.parts if candidate.content else []
|
||||
|
||||
# Check if Gemini wants to call any tools
|
||||
tool_call_parts = [
|
||||
p for p in parts
|
||||
if hasattr(p, "function_call") and p.function_call and p.function_call.name
|
||||
]
|
||||
|
||||
if not tool_call_parts:
|
||||
# No more tool calls — extract Gemini's text summary
|
||||
gemini_summary = "".join(
|
||||
p.text for p in parts if hasattr(p, "text") and p.text
|
||||
).strip()
|
||||
logger.info("Orchestrator done after %d round(s). Tools used: %d",
|
||||
round_num + 1, len(tool_call_log))
|
||||
break
|
||||
return gemini_summary, None
|
||||
|
||||
# Add Gemini's response (with function calls) to the conversation
|
||||
contents.append(candidate.content)
|
||||
|
||||
# Execute tool calls — check confirmation requirement before calling
|
||||
# Snapshot state before function responses — used if a checkpoint is needed
|
||||
pre_fn_state = list(contents)
|
||||
|
||||
response_parts: list[types.Part] = []
|
||||
confirm_requested = False
|
||||
pending_tools: list[dict] = []
|
||||
executed_results: list[dict] = []
|
||||
|
||||
for fc_part in tool_call_parts:
|
||||
fc = fc_part.function_call
|
||||
name = fc.name
|
||||
args = dict(fc.args)
|
||||
|
||||
if name in CONFIRM_REQUIRED:
|
||||
args_str = json.dumps(args, indent=2) if args else "(no arguments)"
|
||||
result_str = (
|
||||
f"⚠️ CONFIRMATION REQUIRED ⚠️\n"
|
||||
f"Tool: {name}\nArguments:\n{args_str}\n\n"
|
||||
f"Do NOT call this tool again. Tell the user exactly what you were "
|
||||
f"about to do, explain the potential impact, and ask them to confirm "
|
||||
f"by sending a follow-up message before you proceed."
|
||||
)
|
||||
confirm_requested = True
|
||||
if name in effective_confirm:
|
||||
pending_tools.append({"name": name, "args": args})
|
||||
logger.info("Tool %s blocked — confirmation required", name)
|
||||
else:
|
||||
result_str = await _execute_tool(name, args, tool_callables)
|
||||
logger.info("Tool %s → %d chars", name, len(result_str))
|
||||
executed_results.append({"name": name, "args": args, "result": result_str})
|
||||
tool_call_log.append({"tool": name, "args": args, "result": result_str})
|
||||
response_parts.append(types.Part(function_response=types.FunctionResponse(
|
||||
name=name, response={"result": result_str}
|
||||
)))
|
||||
|
||||
tool_call_log.append({
|
||||
"tool": name,
|
||||
"args": args,
|
||||
"result": "[awaiting confirmation]" if name in CONFIRM_REQUIRED else result_str,
|
||||
})
|
||||
response_parts.append(
|
||||
types.Part(
|
||||
function_response=types.FunctionResponse(
|
||||
name=name,
|
||||
response={"result": result_str},
|
||||
)
|
||||
)
|
||||
)
|
||||
if pending_tools:
|
||||
# Add placeholder responses and get Gemini to produce the confirmation message
|
||||
for pt in pending_tools:
|
||||
placeholder = f"[AWAITING USER CONFIRMATION for {pt['name']}]"
|
||||
response_parts.append(types.Part(function_response=types.FunctionResponse(
|
||||
name=pt["name"], response={"result": placeholder}
|
||||
)))
|
||||
tool_call_log.append({"tool": pt["name"], "args": pt["args"], "result": "[awaiting confirmation]"})
|
||||
|
||||
contents.append(types.Content(role="user", parts=response_parts))
|
||||
contents.append(types.Content(role="user", parts=response_parts))
|
||||
|
||||
if confirm_requested:
|
||||
# Allow one more Gemini round to produce the confirmation-request message,
|
||||
# then break — tool is not executed until user confirms in a follow-up.
|
||||
conf_response = await asyncio.to_thread(
|
||||
client.models.generate_content,
|
||||
model=model_name or settings.orchestrator_model,
|
||||
@@ -191,10 +349,30 @@ async def run(
|
||||
gemini_summary = "".join(
|
||||
p.text for p in conf_parts if hasattr(p, "text") and p.text
|
||||
).strip() or "This action requires your explicit confirmation before it can proceed."
|
||||
break
|
||||
|
||||
checkpoint = OrchestrateCheckpoint(
|
||||
engine="gemini",
|
||||
pre_fn_state=pre_fn_state,
|
||||
executed_results=executed_results,
|
||||
pending_tools=pending_tools,
|
||||
tool_call_log=list(tool_call_log),
|
||||
task=task,
|
||||
system_prompt=system_prompt,
|
||||
session_messages=session_messages,
|
||||
model_name=model_name,
|
||||
gemini_api_key=gemini_api_key,
|
||||
respond_with_claude=respond_with_claude,
|
||||
response_role=response_role,
|
||||
user_role=user_role,
|
||||
confirm_allow=confirm_allow,
|
||||
confirm_deny=confirm_deny,
|
||||
rounds_used=round_num + 2,
|
||||
)
|
||||
return gemini_summary, checkpoint
|
||||
|
||||
contents.append(types.Content(role="user", parts=response_parts))
|
||||
|
||||
else:
|
||||
# Hit the round limit — use whatever Gemini produced last
|
||||
logger.warning("Orchestrator hit max rounds (%d)", settings.orchestrator_max_rounds)
|
||||
gemini_summary = (
|
||||
f"Reached the tool iteration limit ({settings.orchestrator_max_rounds} rounds). "
|
||||
@@ -202,21 +380,28 @@ async def run(
|
||||
+ "\n\n".join(f"**{t['tool']}**: {t['result'][:500]}" for t in tool_call_log)
|
||||
)
|
||||
|
||||
# --- Claude handoff ---
|
||||
return gemini_summary, None
|
||||
|
||||
|
||||
async def _claude_handoff(
|
||||
task: str,
|
||||
tool_call_log: list[dict],
|
||||
gemini_summary: str,
|
||||
system_prompt: str,
|
||||
session_messages: list[dict] | None,
|
||||
respond_with_claude: bool,
|
||||
response_role: str,
|
||||
) -> OrchestratorResult:
|
||||
if respond_with_claude:
|
||||
claude_prompt = _build_claude_prompt(task, tool_call_log, gemini_summary)
|
||||
|
||||
# Merge with session history so Claude has conversation context
|
||||
messages = list(session_messages or [])
|
||||
messages.append({"role": "user", "content": claude_prompt})
|
||||
|
||||
response_text, backend = await complete(
|
||||
system_prompt=system_prompt,
|
||||
messages=messages,
|
||||
role=response_role,
|
||||
)
|
||||
else:
|
||||
# Cron/background tasks: return Gemini's summary directly, no Claude call
|
||||
response_text = gemini_summary or "No information gathered."
|
||||
backend = "gemini"
|
||||
|
||||
@@ -242,12 +427,11 @@ def _build_task_prompt(task: str, session_messages: list[dict] | None) -> str:
|
||||
if not session_messages:
|
||||
return task
|
||||
|
||||
# Include last few turns for context (don't send the full history to keep tokens low)
|
||||
recent = session_messages[-6:] # last 3 turns
|
||||
recent = session_messages[-6:]
|
||||
history_lines = []
|
||||
for msg in recent:
|
||||
label = "User" if msg["role"] == "user" else "Assistant"
|
||||
history_lines.append(f"{label}: {msg['content'][:300]}") # truncate long messages
|
||||
history_lines.append(f"{label}: {msg['content'][:300]}")
|
||||
|
||||
context = "\n".join(history_lines)
|
||||
return f"<recent_conversation>\n{context}\n</recent_conversation>\n\nCurrent request: {task}"
|
||||
@@ -265,7 +449,6 @@ def _build_claude_prompt(
|
||||
parts.append("## Research gathered\n")
|
||||
for tc in tool_calls:
|
||||
parts.append(f"### {tc['tool']}({_format_args(tc['args'])})")
|
||||
# Truncate very long results — Claude gets the gist
|
||||
result = tc["result"]
|
||||
if len(result) > 2000:
|
||||
result = result[:2000] + "\n… [truncated]"
|
||||
|
||||
Reference in New Issue
Block a user