feat: Intelligence Layer Phase 1 — orchestrator service
Adds the Gemini API orchestrator (ReAct tool loop → Claude responder):
Orchestrator engine + router:
- orchestrator_engine.py: Gemini API tool loop, Claude CLI handoff
- routers/orchestrator.py: POST /orchestrate (async job queue), GET /orchestrate/{job_id}
Tools (cortex/tools/):
- web.py: DuckDuckGo web search (no key required)
- ae_knowledge.py: ae_journal_search + ae_journal_entry_create (AE V3 API)
- ae_tasks.py: ae_task_list (reads agents_sync Kanban filesystem)
- files.py: file_read (path-allowlisted to safe dirs)
Config + deps:
- config.py: orchestrator, DuckDuckGo, and AE API settings
- requirements.txt: google-genai, duckduckgo-search
- .env.default: reference config with all new keys documented
Docs:
- CLAUDE.md, README.md, documentation/ added to repo
- Port references updated 7331 → 8000 throughout
- Default model updated to gemini-2.5-flash
Tested: ae_task_list, ae_journal_search, web_search all working end-to-end.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
243
cortex/orchestrator_engine.py
Normal file
243
cortex/orchestrator_engine.py
Normal file
@@ -0,0 +1,243 @@
|
||||
"""
|
||||
Orchestrator engine — two-brain architecture.
|
||||
|
||||
Flow:
|
||||
1. Gemini API runs a ReAct tool loop (reason → act → observe → repeat)
|
||||
2. When Gemini has gathered enough context, it produces a final summary
|
||||
3. That enriched context is handed off to Claude for the user-facing response
|
||||
|
||||
Why this split:
|
||||
- Gemini API has native structured tool calling (Gemini CLI subprocess does not)
|
||||
- Claude produces higher-quality user-facing prose and reasoning
|
||||
- Claude Pro subscription has no API cost; Gemini free tier handles orchestration load
|
||||
|
||||
For direct chat (no tools needed), this engine is not invoked — the chat router
|
||||
calls llm_client.complete() directly, which is faster and has no orchestration overhead.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
|
||||
from config import settings
|
||||
from llm_client import complete
|
||||
from tools import TOOL_DECLARATIONS, call_tool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# System prompt given to Gemini during the tool loop.
|
||||
# Gemini's job is information gathering and planning — NOT writing the final response.
|
||||
_ORCHESTRATOR_SYSTEM = """You are an intelligent orchestrator. Your job is to:
|
||||
1. Understand the user's request
|
||||
2. Call tools to gather the information needed to answer it
|
||||
3. Once you have enough information, produce a concise summary of:
|
||||
- What the user asked
|
||||
- What you found (tool results, key facts)
|
||||
- Any important context that would help generate a good answer
|
||||
|
||||
Do NOT write a polished final answer — a human-facing AI will do that next.
|
||||
Keep your summary factual and complete. Include relevant URLs, data, and specifics.
|
||||
If no tools are needed, return an empty summary."""
|
||||
|
||||
|
||||
@dataclass
|
||||
class OrchestratorResult:
|
||||
response: str # final user-facing response (from Claude)
|
||||
tool_calls: list[dict] = field(default_factory=list) # [{tool, args, result}]
|
||||
backend: str = "claude" # model that produced the final response
|
||||
gemini_summary: str = "" # what Gemini handed to Claude (debug/display)
|
||||
|
||||
|
||||
async def run(
|
||||
task: str,
|
||||
system_prompt: str = "",
|
||||
session_messages: list[dict] | None = None,
|
||||
respond_with_claude: bool = True,
|
||||
) -> OrchestratorResult:
|
||||
"""
|
||||
Run the full orchestration loop for a task.
|
||||
|
||||
Args:
|
||||
task: The user's request (plain text)
|
||||
system_prompt: Inara's system prompt (from context_loader) — passed to Claude
|
||||
session_messages: Prior conversation history for session continuity
|
||||
respond_with_claude: If False, return Gemini's summary as the response (useful for
|
||||
background/cron tasks where a polished reply isn't needed)
|
||||
|
||||
Returns:
|
||||
OrchestratorResult with response, tool call log, backend used, and Gemini summary
|
||||
"""
|
||||
if not settings.gemini_api_key:
|
||||
raise RuntimeError(
|
||||
"GEMINI_API_KEY not set — orchestrator requires Gemini API. "
|
||||
"Get a free key at https://aistudio.google.com/apikey and add it to .env"
|
||||
)
|
||||
|
||||
client = genai.Client(api_key=settings.gemini_api_key)
|
||||
|
||||
# Seed Gemini with the task — include recent session context if available
|
||||
task_with_context = _build_task_prompt(task, session_messages)
|
||||
contents: list[types.Content] = [
|
||||
types.Content(role="user", parts=[types.Part(text=task_with_context)])
|
||||
]
|
||||
|
||||
tool_call_log: list[dict] = []
|
||||
gemini_summary = ""
|
||||
|
||||
# --- ReAct tool loop ---
|
||||
for round_num in range(settings.orchestrator_max_rounds):
|
||||
logger.info("Orchestrator round %d for task: %.80s", round_num + 1, task)
|
||||
|
||||
response = await asyncio.to_thread(
|
||||
client.models.generate_content,
|
||||
model=settings.orchestrator_model,
|
||||
contents=contents,
|
||||
config=types.GenerateContentConfig(
|
||||
tools=TOOL_DECLARATIONS,
|
||||
system_instruction=_ORCHESTRATOR_SYSTEM,
|
||||
),
|
||||
)
|
||||
|
||||
candidate = response.candidates[0]
|
||||
parts = candidate.content.parts if candidate.content else []
|
||||
|
||||
# Check if Gemini wants to call any tools
|
||||
tool_call_parts = [
|
||||
p for p in parts
|
||||
if hasattr(p, "function_call") and p.function_call and p.function_call.name
|
||||
]
|
||||
|
||||
if not tool_call_parts:
|
||||
# No more tool calls — extract Gemini's text summary
|
||||
gemini_summary = "".join(
|
||||
p.text for p in parts if hasattr(p, "text") and p.text
|
||||
).strip()
|
||||
logger.info("Orchestrator done after %d round(s). Tools used: %d",
|
||||
round_num + 1, len(tool_call_log))
|
||||
break
|
||||
|
||||
# Add Gemini's response (with function calls) to the conversation
|
||||
contents.append(candidate.content)
|
||||
|
||||
# Execute all tool calls in parallel
|
||||
tool_tasks = [
|
||||
_execute_tool(fc.function_call.name, dict(fc.function_call.args))
|
||||
for fc in tool_call_parts
|
||||
]
|
||||
tool_results = await asyncio.gather(*tool_tasks, return_exceptions=True)
|
||||
|
||||
# Build function response parts and update log
|
||||
response_parts: list[types.Part] = []
|
||||
for fc_part, result in zip(tool_call_parts, tool_results):
|
||||
fc = fc_part.function_call
|
||||
result_str = str(result) if not isinstance(result, Exception) else f"Error: {result}"
|
||||
logger.info("Tool %s → %d chars", fc.name, len(result_str))
|
||||
|
||||
tool_call_log.append({
|
||||
"tool": fc.name,
|
||||
"args": dict(fc.args),
|
||||
"result": result_str,
|
||||
})
|
||||
response_parts.append(
|
||||
types.Part(
|
||||
function_response=types.FunctionResponse(
|
||||
name=fc.name,
|
||||
response={"result": result_str},
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
contents.append(types.Content(role="user", parts=response_parts))
|
||||
|
||||
else:
|
||||
# Hit the round limit — use whatever Gemini produced last
|
||||
logger.warning("Orchestrator hit max rounds (%d)", settings.orchestrator_max_rounds)
|
||||
gemini_summary = (
|
||||
f"Reached the tool iteration limit ({settings.orchestrator_max_rounds} rounds). "
|
||||
"Here is what was gathered so far:\n\n"
|
||||
+ "\n\n".join(f"**{t['tool']}**: {t['result'][:500]}" for t in tool_call_log)
|
||||
)
|
||||
|
||||
# --- Claude handoff ---
|
||||
if respond_with_claude:
|
||||
claude_prompt = _build_claude_prompt(task, tool_call_log, gemini_summary)
|
||||
|
||||
# Merge with session history so Claude has conversation context
|
||||
messages = list(session_messages or [])
|
||||
messages.append({"role": "user", "content": claude_prompt})
|
||||
|
||||
response_text, backend = await complete(
|
||||
system_prompt=system_prompt,
|
||||
messages=messages,
|
||||
model="claude",
|
||||
)
|
||||
else:
|
||||
# Cron/background tasks: return Gemini's summary directly, no Claude call
|
||||
response_text = gemini_summary or "No information gathered."
|
||||
backend = "gemini"
|
||||
|
||||
return OrchestratorResult(
|
||||
response=response_text,
|
||||
tool_calls=tool_call_log,
|
||||
backend=backend,
|
||||
gemini_summary=gemini_summary,
|
||||
)
|
||||
|
||||
|
||||
async def _execute_tool(name: str, args: dict) -> str:
|
||||
"""Execute a single tool call, catching all exceptions."""
|
||||
try:
|
||||
return await call_tool(name, args)
|
||||
except Exception as e:
|
||||
logger.warning("Tool %s failed: %s", name, e)
|
||||
return f"Tool error: {e}"
|
||||
|
||||
|
||||
def _build_task_prompt(task: str, session_messages: list[dict] | None) -> str:
|
||||
"""Prepend recent session context so Gemini understands the conversation."""
|
||||
if not session_messages:
|
||||
return task
|
||||
|
||||
# Include last few turns for context (don't send the full history to keep tokens low)
|
||||
recent = session_messages[-6:] # last 3 turns
|
||||
history_lines = []
|
||||
for msg in recent:
|
||||
label = "User" if msg["role"] == "user" else "Assistant"
|
||||
history_lines.append(f"{label}: {msg['content'][:300]}") # truncate long messages
|
||||
|
||||
context = "\n".join(history_lines)
|
||||
return f"<recent_conversation>\n{context}\n</recent_conversation>\n\nCurrent request: {task}"
|
||||
|
||||
|
||||
def _build_claude_prompt(
|
||||
task: str,
|
||||
tool_calls: list[dict],
|
||||
gemini_summary: str,
|
||||
) -> str:
|
||||
"""Build the enriched context handed from Gemini to Claude."""
|
||||
parts = [f"User request: {task}\n"]
|
||||
|
||||
if tool_calls:
|
||||
parts.append("## Research gathered\n")
|
||||
for tc in tool_calls:
|
||||
parts.append(f"### {tc['tool']}({_format_args(tc['args'])})")
|
||||
# Truncate very long results — Claude gets the gist
|
||||
result = tc["result"]
|
||||
if len(result) > 2000:
|
||||
result = result[:2000] + "\n… [truncated]"
|
||||
parts.append(result)
|
||||
parts.append("")
|
||||
|
||||
if gemini_summary:
|
||||
parts.append("## Summary of findings\n")
|
||||
parts.append(gemini_summary)
|
||||
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
def _format_args(args: dict) -> str:
|
||||
"""Format tool args as a compact string for display."""
|
||||
return ", ".join(f"{k}={repr(v)}" for k, v in args.items())
|
||||
Reference in New Issue
Block a user