feat: Intelligence Layer Phase 1 — orchestrator service

Adds the Gemini API orchestrator (ReAct tool loop → Claude responder): Orchestrator engine + router: - orchestrator_engine.py: Gemini API tool loop, Claude CLI handoff - routers/orchestrator.py: POST /orchestrate (async job queue), GET /orchestrate/{job_id} Tools (cortex/tools/): - web.py: DuckDuckGo web search (no key required) - ae_knowledge.py: ae_journal_search + ae_journal_entry_create (AE V3 API) - ae_tasks.py: ae_task_list (reads agents_sync Kanban filesystem) - files.py: file_read (path-allowlisted to safe dirs) Config + deps: - config.py: orchestrator, DuckDuckGo, and AE API settings - requirements.txt: google-genai, duckduckgo-search - .env.default: reference config with all new keys documented Docs: - CLAUDE.md, README.md, documentation/ added to repo - Port references updated 7331 → 8000 throughout - Default model updated to gemini-2.5-flash Tested: ae_task_list, ae_journal_search, web_search all working end-to-end. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-18 19:37:49 -04:00
parent 23f8659aaa
commit ed472ce9a0
15 changed files with 1840 additions and 1 deletions
--- a/cortex/orchestrator_engine.py
+++ b/cortex/orchestrator_engine.py
@@ -0,0 +1,243 @@
+"""
+Orchestrator engine — two-brain architecture.
+
+Flow:
+  1. Gemini API runs a ReAct tool loop (reason → act → observe → repeat)
+  2. When Gemini has gathered enough context, it produces a final summary
+  3. That enriched context is handed off to Claude for the user-facing response
+
+Why this split:
+  - Gemini API has native structured tool calling (Gemini CLI subprocess does not)
+  - Claude produces higher-quality user-facing prose and reasoning
+  - Claude Pro subscription has no API cost; Gemini free tier handles orchestration load
+
+For direct chat (no tools needed), this engine is not invoked — the chat router
+calls llm_client.complete() directly, which is faster and has no orchestration overhead.
+"""
+
+import asyncio
+import logging
+from dataclasses import dataclass, field
+
+from google import genai
+from google.genai import types
+
+from config import settings
+from llm_client import complete
+from tools import TOOL_DECLARATIONS, call_tool
+
+logger = logging.getLogger(__name__)
+
+# System prompt given to Gemini during the tool loop.
+# Gemini's job is information gathering and planning — NOT writing the final response.
+_ORCHESTRATOR_SYSTEM = """You are an intelligent orchestrator. Your job is to:
+1. Understand the user's request
+2. Call tools to gather the information needed to answer it
+3. Once you have enough information, produce a concise summary of:
+   - What the user asked
+   - What you found (tool results, key facts)
+   - Any important context that would help generate a good answer
+
+Do NOT write a polished final answer — a human-facing AI will do that next.
+Keep your summary factual and complete. Include relevant URLs, data, and specifics.
+If no tools are needed, return an empty summary."""
+
+
+@dataclass
+class OrchestratorResult:
+    response: str                       # final user-facing response (from Claude)
+    tool_calls: list[dict] = field(default_factory=list)  # [{tool, args, result}]
+    backend: str = "claude"             # model that produced the final response
+    gemini_summary: str = ""            # what Gemini handed to Claude (debug/display)
+
+
+async def run(
+    task: str,
+    system_prompt: str = "",
+    session_messages: list[dict] | None = None,
+    respond_with_claude: bool = True,
+) -> OrchestratorResult:
+    """
+    Run the full orchestration loop for a task.
+
+    Args:
+        task:               The user's request (plain text)
+        system_prompt:      Inara's system prompt (from context_loader) — passed to Claude
+        session_messages:   Prior conversation history for session continuity
+        respond_with_claude: If False, return Gemini's summary as the response (useful for
+                             background/cron tasks where a polished reply isn't needed)
+
+    Returns:
+        OrchestratorResult with response, tool call log, backend used, and Gemini summary
+    """
+    if not settings.gemini_api_key:
+        raise RuntimeError(
+            "GEMINI_API_KEY not set — orchestrator requires Gemini API. "
+            "Get a free key at https://aistudio.google.com/apikey and add it to .env"
+        )
+
+    client = genai.Client(api_key=settings.gemini_api_key)
+
+    # Seed Gemini with the task — include recent session context if available
+    task_with_context = _build_task_prompt(task, session_messages)
+    contents: list[types.Content] = [
+        types.Content(role="user", parts=[types.Part(text=task_with_context)])
+    ]
+
+    tool_call_log: list[dict] = []
+    gemini_summary = ""
+
+    # --- ReAct tool loop ---
+    for round_num in range(settings.orchestrator_max_rounds):
+        logger.info("Orchestrator round %d for task: %.80s", round_num + 1, task)
+
+        response = await asyncio.to_thread(
+            client.models.generate_content,
+            model=settings.orchestrator_model,
+            contents=contents,
+            config=types.GenerateContentConfig(
+                tools=TOOL_DECLARATIONS,
+                system_instruction=_ORCHESTRATOR_SYSTEM,
+            ),
+        )
+
+        candidate = response.candidates[0]
+        parts = candidate.content.parts if candidate.content else []
+
+        # Check if Gemini wants to call any tools
+        tool_call_parts = [
+            p for p in parts
+            if hasattr(p, "function_call") and p.function_call and p.function_call.name
+        ]
+
+        if not tool_call_parts:
+            # No more tool calls — extract Gemini's text summary
+            gemini_summary = "".join(
+                p.text for p in parts if hasattr(p, "text") and p.text
+            ).strip()
+            logger.info("Orchestrator done after %d round(s). Tools used: %d",
+                        round_num + 1, len(tool_call_log))
+            break
+
+        # Add Gemini's response (with function calls) to the conversation
+        contents.append(candidate.content)
+
+        # Execute all tool calls in parallel
+        tool_tasks = [
+            _execute_tool(fc.function_call.name, dict(fc.function_call.args))
+            for fc in tool_call_parts
+        ]
+        tool_results = await asyncio.gather(*tool_tasks, return_exceptions=True)
+
+        # Build function response parts and update log
+        response_parts: list[types.Part] = []
+        for fc_part, result in zip(tool_call_parts, tool_results):
+            fc = fc_part.function_call
+            result_str = str(result) if not isinstance(result, Exception) else f"Error: {result}"
+            logger.info("Tool %s → %d chars", fc.name, len(result_str))
+
+            tool_call_log.append({
+                "tool": fc.name,
+                "args": dict(fc.args),
+                "result": result_str,
+            })
+            response_parts.append(
+                types.Part(
+                    function_response=types.FunctionResponse(
+                        name=fc.name,
+                        response={"result": result_str},
+                    )
+                )
+            )
+
+        contents.append(types.Content(role="user", parts=response_parts))
+
+    else:
+        # Hit the round limit — use whatever Gemini produced last
+        logger.warning("Orchestrator hit max rounds (%d)", settings.orchestrator_max_rounds)
+        gemini_summary = (
+            f"Reached the tool iteration limit ({settings.orchestrator_max_rounds} rounds). "
+            "Here is what was gathered so far:\n\n"
+            + "\n\n".join(f"**{t['tool']}**: {t['result'][:500]}" for t in tool_call_log)
+        )
+
+    # --- Claude handoff ---
+    if respond_with_claude:
+        claude_prompt = _build_claude_prompt(task, tool_call_log, gemini_summary)
+
+        # Merge with session history so Claude has conversation context
+        messages = list(session_messages or [])
+        messages.append({"role": "user", "content": claude_prompt})
+
+        response_text, backend = await complete(
+            system_prompt=system_prompt,
+            messages=messages,
+            model="claude",
+        )
+    else:
+        # Cron/background tasks: return Gemini's summary directly, no Claude call
+        response_text = gemini_summary or "No information gathered."
+        backend = "gemini"
+
+    return OrchestratorResult(
+        response=response_text,
+        tool_calls=tool_call_log,
+        backend=backend,
+        gemini_summary=gemini_summary,
+    )
+
+
+async def _execute_tool(name: str, args: dict) -> str:
+    """Execute a single tool call, catching all exceptions."""
+    try:
+        return await call_tool(name, args)
+    except Exception as e:
+        logger.warning("Tool %s failed: %s", name, e)
+        return f"Tool error: {e}"
+
+
+def _build_task_prompt(task: str, session_messages: list[dict] | None) -> str:
+    """Prepend recent session context so Gemini understands the conversation."""
+    if not session_messages:
+        return task
+
+    # Include last few turns for context (don't send the full history to keep tokens low)
+    recent = session_messages[-6:]  # last 3 turns
+    history_lines = []
+    for msg in recent:
+        label = "User" if msg["role"] == "user" else "Assistant"
+        history_lines.append(f"{label}: {msg['content'][:300]}")  # truncate long messages
+
+    context = "\n".join(history_lines)
+    return f"<recent_conversation>\n{context}\n</recent_conversation>\n\nCurrent request: {task}"
+
+
+def _build_claude_prompt(
+    task: str,
+    tool_calls: list[dict],
+    gemini_summary: str,
+) -> str:
+    """Build the enriched context handed from Gemini to Claude."""
+    parts = [f"User request: {task}\n"]
+
+    if tool_calls:
+        parts.append("## Research gathered\n")
+        for tc in tool_calls:
+            parts.append(f"### {tc['tool']}({_format_args(tc['args'])})")
+            # Truncate very long results — Claude gets the gist
+            result = tc["result"]
+            if len(result) > 2000:
+                result = result[:2000] + "\n… [truncated]"
+            parts.append(result)
+            parts.append("")
+
+    if gemini_summary:
+        parts.append("## Summary of findings\n")
+        parts.append(gemini_summary)
+
+    return "\n".join(parts)
+
+
+def _format_args(args: dict) -> str:
+    """Format tool args as a compact string for display."""
+    return ", ".join(f"{k}={repr(v)}" for k, v in args.items())