Cortex-Inara/cortex/orchestrator_engine.py

"""
Orchestrator engine — two-brain architecture.

Flow:
  1. Gemini API runs a ReAct tool loop (reason → act → observe → repeat)
  2. When Gemini has gathered enough context, it produces a final summary
  3. That enriched context is handed off to Claude for the user-facing response

Why this split:
  - Gemini API has native structured tool calling (Gemini CLI subprocess does not)
  - Claude produces higher-quality user-facing prose and reasoning
  - Claude Pro subscription has no API cost; Gemini free tier handles orchestration load

For direct chat (no tools needed), this engine is not invoked — the chat router
calls llm_client.complete() directly, which is faster and has no orchestration overhead.
"""

import asyncio
import json
import logging
from dataclasses import dataclass, field

from google import genai
from google.genai import types

from config import settings
from llm_client import complete
from tools import TOOL_DECLARATIONS, call_tool, get_tools_for_role, CONFIRM_REQUIRED
import usage_tracker
import tool_audit
from persona import _user

logger = logging.getLogger(__name__)

# System prompt given to Gemini during the tool loop.
# Gemini's job is information gathering and planning — NOT writing the final response.
_ORCHESTRATOR_SYSTEM = """You are an intelligent orchestrator. Your job is to:
1. Understand the user's request
2. Call tools to gather the information needed to answer it
3. Once you have enough information, produce a concise summary of:
   - What the user asked
   - What you found (tool results, key facts)
   - Any important context that would help generate a good answer

Do NOT write a polished final answer — a human-facing AI will do that next.
Keep your summary factual and complete. Include relevant URLs, data, and specifics.
If no tools are needed, return an empty summary."""


def _track_gemini_usage(response, model_name: str | None) -> None:
    meta = getattr(response, "usage_metadata", None)
    if not meta:
        return
    prompt_tokens = getattr(meta, "prompt_token_count", 0) or 0
    completion_tokens = getattr(meta, "candidates_token_count", 0) or 0
    if prompt_tokens or completion_tokens:
        try:
            asyncio.create_task(usage_tracker.record(
                username=_user.get(),
                backend="gemini_api",
                model_name=model_name or settings.orchestrator_model,
                prompt_tokens=prompt_tokens,
                completion_tokens=completion_tokens,
            ))
        except Exception:
            pass


@dataclass
class OrchestrateCheckpoint:
    """Saved execution state for a job paused at a confirmation gate."""
    engine: str                          # "gemini" | "openai"
    pre_fn_state: list                   # conversation state before function responses
    executed_results: list[dict]         # tools that already ran this round
    pending_tools: list[dict]            # [{name, args}] awaiting confirmation
    tool_call_log: list[dict]            # all tool calls so far
    task: str
    # Gemini-specific config (unused by openai engine)
    system_prompt: str = ""
    session_messages: list | None = None
    model_name: str | None = None
    gemini_api_key: str | None = None
    respond_with_claude: bool = True
    response_role: str = "chat"
    # OpenAI-specific config (unused by gemini engine)
    model_cfg: dict | None = None
    respond_with_final: bool = True
    # Common
    user_role: str = "user"
    tool_list: list[str] | None = None
    confirm_allow: frozenset = field(default_factory=frozenset)
    confirm_deny: frozenset = field(default_factory=frozenset)
    rounds_used: int = 0
    max_rounds: int | None = None


@dataclass
class OrchestratorResult:
    response: str                       # final user-facing response (from Claude)
    tool_calls: list[dict] = field(default_factory=list)  # [{tool, args, result}]
    backend: str = "claude"             # model that produced the final response
    backend_label: str = ""             # human-readable model label for display
    gemini_summary: str = ""            # what Gemini handed to Claude (debug/display)
    checkpoint: OrchestrateCheckpoint | None = None  # set when awaiting confirmation


async def run(
    task: str,
    system_prompt: str = "",
    session_messages: list[dict] | None = None,
    respond_with_claude: bool = True,
    gemini_api_key: str | None = None,
    model_name: str | None = None,
    response_role: str = "chat",
    user_role: str = "user",
    tool_list: list[str] | None = None,
    confirm_allow: set[str] | None = None,
    confirm_deny: set[str] | None = None,
    max_rounds: int | None = None,
) -> OrchestratorResult:
    """
    Run the full orchestration loop for a task.

    Args:
        task:               The user's request (plain text)
        system_prompt:      Inara's system prompt (from context_loader) — passed to Claude
        session_messages:   Prior conversation history for session continuity
        respond_with_claude: If False, return Gemini's summary as the response (useful for
                             background/cron tasks where a polished reply isn't needed)
        gemini_api_key:     Per-user Gemini API key (falls back to GEMINI_API_KEY in .env)
        tool_list:          Optional explicit tool allow-list from role config; intersected
                            with user_role access-level filter (cannot elevate privileges)
        confirm_allow:      Tools to bypass the confirmation gate for this user
        confirm_deny:       Tools to always block for this user

    Returns:
        OrchestratorResult — if checkpoint is set, the job is awaiting confirmation
    """
    api_key = gemini_api_key or settings.gemini_api_key
    if not api_key:
        raise RuntimeError(
            "No Gemini API key available — set GEMINI_API_KEY in .env or add a personal key "
            "via: manage_passwords.py gemini-key <username> <key>"
        )

    client = genai.Client(api_key=api_key)
    tool_audit.set_context("gemini", model_name or settings.orchestrator_model)

    _confirm_allow = frozenset(confirm_allow or ())
    _confirm_deny = frozenset(confirm_deny or ())
    effective_confirm = (CONFIRM_REQUIRED - set(_confirm_allow)) | set(_confirm_deny)

    task_with_context = _build_task_prompt(task, session_messages)
    contents: list[types.Content] = [
        types.Content(role="user", parts=[types.Part(text=task_with_context)])
    ]
    tool_declarations, tool_callables = get_tools_for_role(user_role, tool_list)
    tool_call_log: list[dict] = []

    gemini_summary, checkpoint = await _run_from_contents(
        client=client,
        contents=contents,
        tool_declarations=tool_declarations,
        tool_callables=tool_callables,
        tool_call_log=tool_call_log,
        effective_confirm=effective_confirm,
        model_name=model_name,
        task=task,
        system_prompt=system_prompt,
        session_messages=session_messages,
        respond_with_claude=respond_with_claude,
        response_role=response_role,
        user_role=user_role,
        tool_list=tool_list,
        confirm_allow=_confirm_allow,
        confirm_deny=_confirm_deny,
        starting_round=0,
        gemini_api_key=api_key,
        max_rounds=max_rounds,
    )

    if checkpoint:
        return OrchestratorResult(
            response=gemini_summary,
            tool_calls=list(tool_call_log),
            backend="gemini",
            gemini_summary=gemini_summary,
            checkpoint=checkpoint,
        )

    return await _claude_handoff(
        task=task,
        tool_call_log=tool_call_log,
        gemini_summary=gemini_summary,
        system_prompt=system_prompt,
        session_messages=session_messages,
        respond_with_claude=respond_with_claude,
        response_role=response_role,
    )


async def resume(checkpoint: OrchestrateCheckpoint, confirmed: bool) -> OrchestratorResult:
    """Continue a job that was paused at a confirmation gate."""
    api_key = checkpoint.gemini_api_key or settings.gemini_api_key
    client = genai.Client(api_key=api_key)
    tool_declarations, tool_callables = get_tools_for_role(checkpoint.user_role, checkpoint.tool_list)

    effective_confirm = (CONFIRM_REQUIRED - set(checkpoint.confirm_allow)) | set(checkpoint.confirm_deny)

    # Rebuild from saved state — strip "[awaiting confirmation]" placeholders
    contents = list(checkpoint.pre_fn_state)
    tool_call_log = [t for t in checkpoint.tool_call_log if t["result"] != "[awaiting confirmation]"]

    # Build function responses for this round
    response_parts: list[types.Part] = []

    for er in checkpoint.executed_results:
        response_parts.append(types.Part(function_response=types.FunctionResponse(
            name=er["name"], response={"result": er["result"]}
        )))

    for pt in checkpoint.pending_tools:
        if confirmed:
            result_str = await _execute_tool(pt["name"], pt["args"], tool_callables)
            logger.info("Confirmed tool %s → %d chars", pt["name"], len(result_str))
        else:
            result_str = "Action denied by user."
            logger.info("Tool %s denied by user", pt["name"])
        tool_call_log.append({"tool": pt["name"], "args": pt["args"], "result": result_str})
        response_parts.append(types.Part(function_response=types.FunctionResponse(
            name=pt["name"], response={"result": result_str}
        )))

    contents.append(types.Content(role="user", parts=response_parts))

    gemini_summary, new_checkpoint = await _run_from_contents(
        client=client,
        contents=contents,
        tool_declarations=tool_declarations,
        tool_callables=tool_callables,
        tool_call_log=tool_call_log,
        effective_confirm=effective_confirm,
        model_name=checkpoint.model_name,
        task=checkpoint.task,
        system_prompt=checkpoint.system_prompt,
        session_messages=checkpoint.session_messages,
        respond_with_claude=checkpoint.respond_with_claude,
        response_role=checkpoint.response_role,
        user_role=checkpoint.user_role,
        tool_list=checkpoint.tool_list,
        confirm_allow=checkpoint.confirm_allow,
        confirm_deny=checkpoint.confirm_deny,
        starting_round=checkpoint.rounds_used,
        gemini_api_key=api_key,
        max_rounds=checkpoint.max_rounds,
    )

    if new_checkpoint:
        return OrchestratorResult(
            response=gemini_summary,
            tool_calls=list(tool_call_log),
            backend="gemini",
            gemini_summary=gemini_summary,
            checkpoint=new_checkpoint,
        )

    return await _claude_handoff(
        task=checkpoint.task,
        tool_call_log=tool_call_log,
        gemini_summary=gemini_summary,
        system_prompt=checkpoint.system_prompt,
        session_messages=checkpoint.session_messages,
        respond_with_claude=checkpoint.respond_with_claude,
        response_role=checkpoint.response_role,
    )


async def _run_from_contents(
    client,
    contents: list,
    tool_declarations: list,
    tool_callables: dict,
    tool_call_log: list[dict],
    effective_confirm: set[str],
    model_name: str | None,
    task: str,
    system_prompt: str,
    session_messages: list[dict] | None,
    respond_with_claude: bool,
    response_role: str,
    user_role: str,
    confirm_allow: frozenset,
    confirm_deny: frozenset,
    starting_round: int = 0,
    gemini_api_key: str | None = None,
    tool_list: list[str] | None = None,
    max_rounds: int | None = None,
) -> tuple[str, OrchestrateCheckpoint | None]:
    """
    Run the ReAct loop from the current contents state.
    Returns (gemini_summary, checkpoint) — checkpoint is set if confirmation is needed.
    """
    gemini_summary = ""
    per_model_limit = max_rounds or settings.orchestrator_max_rounds
    effective_limit = min(per_model_limit, settings.orchestrator_max_rounds)

    for round_num in range(starting_round, effective_limit):
        logger.info("Orchestrator round %d for task: %.80s", round_num + 1, task)

        response = await asyncio.to_thread(
            client.models.generate_content,
            model=model_name or settings.orchestrator_model,
            contents=contents,
            config=types.GenerateContentConfig(
                tools=tool_declarations,
                system_instruction=_ORCHESTRATOR_SYSTEM,
            ),
        )
        _track_gemini_usage(response, model_name)

        candidate = response.candidates[0]
        parts = candidate.content.parts if candidate.content else []

        tool_call_parts = [
            p for p in parts
            if hasattr(p, "function_call") and p.function_call and p.function_call.name
        ]

        if not tool_call_parts:
            gemini_summary = "".join(
                p.text for p in parts if hasattr(p, "text") and p.text
            ).strip()
            logger.info("Orchestrator done after %d round(s). Tools used: %d",
                        round_num + 1, len(tool_call_log))
            return gemini_summary, None

        contents.append(candidate.content)

        # Snapshot state before function responses — used if a checkpoint is needed
        pre_fn_state = list(contents)

        response_parts: list[types.Part] = []
        pending_tools: list[dict] = []
        executed_results: list[dict] = []

        for fc_part in tool_call_parts:
            fc = fc_part.function_call
            name = fc.name
            args = dict(fc.args)

            if name in effective_confirm:
                pending_tools.append({"name": name, "args": args})
                logger.info("Tool %s blocked — confirmation required", name)
            else:
                result_str = await _execute_tool(name, args, tool_callables)
                logger.info("Tool %s → %d chars", name, len(result_str))
                executed_results.append({"name": name, "args": args, "result": result_str})
                tool_call_log.append({"tool": name, "args": args, "result": result_str})
                response_parts.append(types.Part(function_response=types.FunctionResponse(
                    name=name, response={"result": result_str}
                )))

        if pending_tools:
            # Add placeholder responses and get Gemini to produce the confirmation message
            for pt in pending_tools:
                placeholder = f"[AWAITING USER CONFIRMATION for {pt['name']}]"
                response_parts.append(types.Part(function_response=types.FunctionResponse(
                    name=pt["name"], response={"result": placeholder}
                )))
                tool_call_log.append({"tool": pt["name"], "args": pt["args"], "result": "[awaiting confirmation]"})

            contents.append(types.Content(role="user", parts=response_parts))

            conf_response = await asyncio.to_thread(
                client.models.generate_content,
                model=model_name or settings.orchestrator_model,
                contents=contents,
                config=types.GenerateContentConfig(
                    tools=tool_declarations,
                    system_instruction=_ORCHESTRATOR_SYSTEM,
                ),
            )
            _track_gemini_usage(conf_response, model_name)
            conf_parts = (
                conf_response.candidates[0].content.parts
                if conf_response.candidates and conf_response.candidates[0].content
                else []
            )
            gemini_summary = "".join(
                p.text for p in conf_parts if hasattr(p, "text") and p.text
            ).strip() or "This action requires your explicit confirmation before it can proceed."

            checkpoint = OrchestrateCheckpoint(
                engine="gemini",
                pre_fn_state=pre_fn_state,
                executed_results=executed_results,
                pending_tools=pending_tools,
                tool_call_log=list(tool_call_log),
                task=task,
                system_prompt=system_prompt,
                session_messages=session_messages,
                model_name=model_name,
                gemini_api_key=gemini_api_key,
                respond_with_claude=respond_with_claude,
                response_role=response_role,
                user_role=user_role,
                tool_list=tool_list,
                confirm_allow=confirm_allow,
                confirm_deny=confirm_deny,
                rounds_used=round_num + 2,
                max_rounds=max_rounds,
            )
            return gemini_summary, checkpoint

        contents.append(types.Content(role="user", parts=response_parts))

    else:
        logger.warning("Orchestrator hit max rounds (%d)", effective_limit)
        gemini_summary = (
            f"Reached the tool iteration limit ({effective_limit} rounds). "
            "Here is what was gathered so far:\n\n"
            + "\n\n".join(f"**{t['tool']}**: {t['result'][:500]}" for t in tool_call_log)
        )

    return gemini_summary, None


async def _claude_handoff(
    task: str,
    tool_call_log: list[dict],
    gemini_summary: str,
    system_prompt: str,
    session_messages: list[dict] | None,
    respond_with_claude: bool,
    response_role: str,
) -> OrchestratorResult:
    if respond_with_claude:
        claude_prompt = _build_claude_prompt(task, tool_call_log, gemini_summary)
        messages = list(session_messages or [])
        messages.append({"role": "user", "content": claude_prompt})
        response_text, backend = await complete(
            system_prompt=system_prompt,
            messages=messages,
            role=response_role,
        )
    else:
        response_text = gemini_summary or "No information gathered."
        backend = "gemini"

    return OrchestratorResult(
        response=response_text,
        tool_calls=tool_call_log,
        backend=backend,
        gemini_summary=gemini_summary,
    )


async def _execute_tool(name: str, args: dict, callables: dict | None = None) -> str:
    """Execute a single tool call, catching all exceptions."""
    try:
        return await call_tool(name, args, callables)
    except Exception as e:
        logger.warning("Tool %s failed: %s", name, e)
        return f"Tool error: {e}"


def _build_task_prompt(task: str, session_messages: list[dict] | None) -> str:
    """Prepend recent session context so Gemini understands the conversation."""
    if not session_messages:
        return task

    recent = session_messages[-6:]
    history_lines = []
    for msg in recent:
        label = "User" if msg["role"] == "user" else "Assistant"
        history_lines.append(f"{label}: {msg['content'][:300]}")

    context = "\n".join(history_lines)
    return f"<recent_conversation>\n{context}\n</recent_conversation>\n\nCurrent request: {task}"


def _build_claude_prompt(
    task: str,
    tool_calls: list[dict],
    gemini_summary: str,
) -> str:
    """Build the enriched context handed from Gemini to Claude."""
    parts = [f"User request: {task}\n"]

    if tool_calls:
        parts.append("## Research gathered\n")
        for tc in tool_calls:
            parts.append(f"### {tc['tool']}({_format_args(tc['args'])})")
            result = tc["result"]
            if len(result) > 2000:
                result = result[:2000] + "\n… [truncated]"
            parts.append(result)
            parts.append("")

    if gemini_summary:
        parts.append("## Summary of findings\n")
        parts.append(gemini_summary)

    return "\n".join(parts)


def _format_args(args: dict) -> str:
    """Format tool args as a compact string for display."""
    return ", ".join(f"{k}={repr(v)}" for k, v in args.items())