feat: per-model max_rounds for Gemini orchestrator engine

Mirrors the pattern already in openai_orchestrator.py. The Gemini engine was still hardcoded to the global orchestrator_max_rounds setting. - orchestrator_engine.py: max_rounds param on run() and _run_from_contents(); effective_limit = min(per_model_limit, global_limit); stored in checkpoint so resume() respects it across confirmation gates - routers/orchestrator.py: passes orch_model.get("max_rounds") to run() - tools/agents.py: passes model_cfg.get("max_rounds") for gemini_api spawns Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-08 22:54:37 -04:00
parent 09d775b47b
commit 47d23a7b2f
3 changed files with 13 additions and 3 deletions
--- a/cortex/orchestrator_engine.py
+++ b/cortex/orchestrator_engine.py
@@ -91,6 +91,7 @@ class OrchestrateCheckpoint:
    confirm_allow: frozenset = field(default_factory=frozenset)
    confirm_deny: frozenset = field(default_factory=frozenset)
    rounds_used: int = 0
+    max_rounds: int | None = None


@dataclass
@@ -114,6 +115,7 @@ async def run(
    tool_list: list[str] | None = None,
    confirm_allow: set[str] | None = None,
    confirm_deny: set[str] | None = None,
+    max_rounds: int | None = None,
 ) -> OrchestratorResult:
    """
    Run the full orchestration loop for a task.
@@ -173,6 +175,7 @@ async def run(
        confirm_deny=_confirm_deny,
        starting_round=0,
        gemini_api_key=api_key,
+        max_rounds=max_rounds,
    )

    if checkpoint:
@@ -248,6 +251,7 @@ async def resume(checkpoint: OrchestrateCheckpoint, confirmed: bool) -> Orchestr
        confirm_deny=checkpoint.confirm_deny,
        starting_round=checkpoint.rounds_used,
        gemini_api_key=api_key,
+        max_rounds=checkpoint.max_rounds,
    )

    if new_checkpoint:
@@ -289,14 +293,17 @@ async def _run_from_contents(
    starting_round: int = 0,
    gemini_api_key: str | None = None,
    tool_list: list[str] | None = None,
+    max_rounds: int | None = None,
 ) -> tuple[str, OrchestrateCheckpoint | None]:
    """
    Run the ReAct loop from the current contents state.
    Returns (gemini_summary, checkpoint) — checkpoint is set if confirmation is needed.
    """
    gemini_summary = ""
+    per_model_limit = max_rounds or settings.orchestrator_max_rounds
+    effective_limit = min(per_model_limit, settings.orchestrator_max_rounds)

-    for round_num in range(starting_round, settings.orchestrator_max_rounds):
+    for round_num in range(starting_round, effective_limit):
        logger.info("Orchestrator round %d for task: %.80s", round_num + 1, task)

        response = await asyncio.to_thread(
@@ -400,15 +407,16 @@ async def _run_from_contents(
                confirm_allow=confirm_allow,
                confirm_deny=confirm_deny,
                rounds_used=round_num + 2,
+                max_rounds=max_rounds,
            )
            return gemini_summary, checkpoint

        contents.append(types.Content(role="user", parts=response_parts))

    else:
-        logger.warning("Orchestrator hit max rounds (%d)", settings.orchestrator_max_rounds)
+        logger.warning("Orchestrator hit max rounds (%d)", effective_limit)
        gemini_summary = (
-            f"Reached the tool iteration limit ({settings.orchestrator_max_rounds} rounds). "
+            f"Reached the tool iteration limit ({effective_limit} rounds). "
            "Here is what was gathered so far:\n\n"
            + "\n\n".join(f"**{t['tool']}**: {t['result'][:500]}" for t in tool_call_log)
        )