feat: token streaming for orchestrator final response
Switches the orchestrator's final response from a fire-and-wait model to a
live SSE stream so text appears token-by-token as the model generates it.
- llm_client: complete() gains token_sink param; anthropic_api backend uses
client.messages.stream(); local backend uses httpx SSE streaming; non-streaming
backends (claude_cli, gemini_cli) emit the full text as one chunk
- orchestrator_engine + openai_orchestrator: token_sink threaded through run(),
_run_from_contents(), _claude_handoff(), and _run_from_messages()
- routers/orchestrator: each job gets an asyncio.Queue; _on_progress and
_token_sink write progress/token events to it; _finalize_job emits done,
error handler emits error, confirmation gate emits confirm; new GET
/orchestrate/{job_id}/stream SSE endpoint with 20s keepalive
- app.js: _doOrchestrate switches from 2s poll loop to EventSource; thinking
bubble converts to a streaming message on first token; auto-scroll while
streaming; confirm/error/done events handled; finalization unchanged
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -121,6 +121,7 @@ async def run(
|
||||
risk_whitelist: list[str] | None = None,
|
||||
risk_blacklist: list[str] | None = None,
|
||||
on_progress=None, # async (str) -> None; called with live status updates
|
||||
token_sink=None, # async (str) -> None; called with each response token
|
||||
) -> OrchestratorResult:
|
||||
"""
|
||||
Run the full orchestration loop for a task.
|
||||
@@ -185,6 +186,7 @@ async def run(
|
||||
gemini_api_key=api_key,
|
||||
max_rounds=max_rounds,
|
||||
on_progress=on_progress,
|
||||
token_sink=token_sink,
|
||||
)
|
||||
|
||||
if checkpoint:
|
||||
@@ -207,6 +209,7 @@ async def run(
|
||||
session_messages=session_messages,
|
||||
respond_with_claude=respond_with_claude,
|
||||
response_role=response_role,
|
||||
token_sink=token_sink,
|
||||
)
|
||||
|
||||
|
||||
@@ -270,6 +273,8 @@ async def resume(checkpoint: OrchestrateCheckpoint, confirmed: bool) -> Orchestr
|
||||
gemini_api_key=api_key,
|
||||
max_rounds=checkpoint.max_rounds,
|
||||
)
|
||||
# Note: resume() doesn't have token_sink — the SSE stream endpoint is long-closed
|
||||
# by the time a resumed job's final response is ready; polling fallback applies.
|
||||
|
||||
if new_checkpoint:
|
||||
return OrchestratorResult(
|
||||
@@ -312,6 +317,7 @@ async def _run_from_contents(
|
||||
tool_list: list[str] | None = None,
|
||||
max_rounds: int | None = None,
|
||||
on_progress=None,
|
||||
token_sink=None,
|
||||
) -> tuple[str, OrchestrateCheckpoint | None]:
|
||||
"""
|
||||
Run the ReAct loop from the current contents state.
|
||||
@@ -454,6 +460,7 @@ async def _claude_handoff(
|
||||
session_messages: list[dict] | None,
|
||||
respond_with_claude: bool,
|
||||
response_role: str,
|
||||
token_sink=None,
|
||||
) -> OrchestratorResult:
|
||||
if respond_with_claude:
|
||||
claude_prompt = _build_claude_prompt(task, tool_call_log, gemini_summary)
|
||||
@@ -463,10 +470,13 @@ async def _claude_handoff(
|
||||
system_prompt=system_prompt,
|
||||
messages=messages,
|
||||
role=response_role,
|
||||
token_sink=token_sink,
|
||||
)
|
||||
else:
|
||||
response_text = gemini_summary or "No information gathered."
|
||||
backend = "gemini"
|
||||
if token_sink and response_text:
|
||||
await token_sink(response_text)
|
||||
|
||||
return OrchestratorResult(
|
||||
response=response_text,
|
||||
|
||||
Reference in New Issue
Block a user