Cortex-Inara/cortex/routers/orchestrator.py

"""
Orchestrator router — POST /orchestrate, GET /orchestrate/{job_id}

Accepts a task description, runs it through the orchestrator engine
(Gemini tool loop → Claude response), and returns the result.

Designed to be triggered from:
  - The Cortex web UI (future "Agent mode" toggle)
  - Cron jobs:  curl -X POST http://localhost:8000/orchestrate -d '{"task":"..."}'
  - Webhooks:   Gitea, Aether events, etc.
"""

import asyncio
import logging
import platform
import uuid
from datetime import datetime, timezone

from fastapi import APIRouter, HTTPException, Request
from fastapi.responses import StreamingResponse
from pydantic import BaseModel

from auth_utils import get_user_gemini_key, get_user_role, get_tool_policy, get_risk_policy
from config import settings
from context_loader import load_context
from persona import set_context, validate as validate_persona
import model_registry
import orchestrator_engine
import openai_orchestrator

logger = logging.getLogger(__name__)
router = APIRouter(prefix="/orchestrate", tags=["orchestrator"])

# ---------------------------------------------------------------------------
# In-memory job store
# ---------------------------------------------------------------------------

_jobs: dict[str, dict] = {}
_jobs_lock = asyncio.Lock()

# Checkpoints are stored separately — they hold Python objects (types.Content, etc.)
# that can't be included in the JSON-serializable job dict.
_checkpoints: dict[str, orchestrator_engine.OrchestrateCheckpoint] = {}
_checkpoints_lock = asyncio.Lock()


# ---------------------------------------------------------------------------
# Request / response models
# ---------------------------------------------------------------------------

class OrchestrateRequest(BaseModel):
    task: str
    session_id: str | None = None       # include session history in context
    tier: int | None = None             # Inara context tier (default from settings)
    respond_with_claude: bool = True    # False = return Gemini summary only (faster, for cron)
    include_long: bool = True
    include_mid: bool = True
    include_short: bool = True
    user: str = "scott"
    persona: str = "inara"
    chat_role: str = "chat"             # role used for the final response (decoupled from tool-loop model)
    off_record: bool = False            # skip session log; inject OTR mode line into system prompt


class OrchestrateResponse(BaseModel):
    job_id: str
    status: str     # "queued" | "running" | "complete" | "error" | "awaiting_confirmation"


class JobStatusResponse(BaseModel):
    job_id: str
    status: str
    task: str
    created_at: str
    completed_at: str | None = None
    session_id: str | None = None
    response: str | None = None
    tool_calls: list[dict] | None = None
    backend: str | None = None
    backend_label: str | None = None
    host: str | None = None
    gemini_summary: str | None = None
    error: str | None = None
    pending_confirmation: dict | None = None  # {tools: [{name, args}], message: str}
    progress: str | None = None             # live status text shown in UI during run


# ---------------------------------------------------------------------------
# Endpoints
# ---------------------------------------------------------------------------

@router.post("", response_model=OrchestrateResponse)
async def orchestrate(req: OrchestrateRequest) -> OrchestrateResponse:
    """Submit a task to the orchestrator. Returns a job_id to poll."""
    try:
        user, persona = validate_persona(req.user, req.persona)
        set_context(user, persona)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))

    job_id = str(uuid.uuid4())
    now = datetime.now(timezone.utc).isoformat()

    job: dict = {
        "job_id": job_id,
        "status": "queued",
        "task": req.task,
        "created_at": now,
        "completed_at": None,
        "session_id": None,
        "response": None,
        "tool_calls": None,
        "backend": None,
        "gemini_summary": None,
        "error": None,
        "pending_confirmation": None,
        "progress": None,
        "_user": user,
        "_off_record": req.off_record,
        "_event_queue": asyncio.Queue(),
    }

    async with _jobs_lock:
        _jobs[job_id] = job

    asyncio.create_task(_run_job(job_id, req, user))
    logger.info("Orchestrator job queued: %s — %.80s", job_id, req.task)
    return OrchestrateResponse(job_id=job_id, status="queued")


@router.get("/{job_id}", response_model=JobStatusResponse)
async def job_status(job_id: str) -> JobStatusResponse:
    """Poll the status of an orchestrator job."""
    async with _jobs_lock:
        job = _jobs.get(job_id)

    if job is None:
        raise HTTPException(status_code=404, detail=f"Job {job_id} not found")

    return JobStatusResponse(**{k: v for k, v in job.items() if not k.startswith("_")})


@router.get("", response_model=list[JobStatusResponse])
async def list_jobs() -> list[JobStatusResponse]:
    """List all jobs (most recent first). Useful for debugging."""
    async with _jobs_lock:
        jobs = sorted(_jobs.values(), key=lambda j: j["created_at"], reverse=True)
    return [JobStatusResponse(**{k: v for k, v in j.items() if not k.startswith("_")}) for j in jobs]


@router.get("/{job_id}/stream")
async def stream_job(job_id: str, request: Request) -> StreamingResponse:
    """SSE stream for a running job — emits progress, token, done, error, and confirm events."""
    import json

    async with _jobs_lock:
        job = _jobs.get(job_id)
    if job is None:
        raise HTTPException(status_code=404, detail=f"Job {job_id} not found")

    # If already complete/error, emit a single done/error event immediately.
    if job["status"] == "complete":
        async def _done_now():
            yield f"data: {json.dumps({'type': 'done', 'response': job['response'], 'session_id': job.get('session_id'), 'backend': job.get('backend', ''), 'backend_label': job.get('backend_label', ''), 'host': job.get('host', ''), 'tool_calls': job.get('tool_calls')})}\n\n"
        return StreamingResponse(_done_now(), media_type="text/event-stream")
    if job["status"] == "error":
        async def _err_now():
            yield f"data: {json.dumps({'type': 'error', 'message': job.get('error', 'Unknown error')})}\n\n"
        return StreamingResponse(_err_now(), media_type="text/event-stream")

    queue: asyncio.Queue = job["_event_queue"]

    async def generate():
        yield 'data: {"type":"connected"}\n\n'
        while True:
            if await request.is_disconnected():
                break
            try:
                event = await asyncio.wait_for(queue.get(), timeout=20)
                yield f"data: {json.dumps(event)}\n\n"
                if event["type"] in ("done", "error"):
                    break
                # For confirm events: keep listening — job will resume after user action.
            except asyncio.TimeoutError:
                yield 'data: {"type":"keepalive"}\n\n'

    return StreamingResponse(generate(), media_type="text/event-stream")


@router.post("/{job_id}/confirm", response_model=OrchestrateResponse)
async def confirm_job(job_id: str) -> OrchestrateResponse:
    """Confirm a pending tool call — the blocked tool will execute and the job continues."""
    async with _checkpoints_lock:
        checkpoint = _checkpoints.pop(job_id, None)

    if checkpoint is None:
        raise HTTPException(status_code=404, detail="No pending confirmation for this job")

    async with _jobs_lock:
        job = _jobs.get(job_id)
        if not job or job["status"] != "awaiting_confirmation":
            raise HTTPException(status_code=409, detail="Job is not awaiting confirmation")
        _jobs[job_id]["status"] = "running"
        _jobs[job_id]["pending_confirmation"] = None
        user = job.get("_user", "scott")

    asyncio.create_task(_resume_job(job_id, checkpoint, confirmed=True, user=user))
    logger.info("Orchestrator job %s confirmed — resuming", job_id)
    return OrchestrateResponse(job_id=job_id, status="running")


@router.post("/{job_id}/deny", response_model=OrchestrateResponse)
async def deny_job(job_id: str) -> OrchestrateResponse:
    """Deny a pending tool call — the tool is skipped and the job produces a final response."""
    async with _checkpoints_lock:
        checkpoint = _checkpoints.pop(job_id, None)

    if checkpoint is None:
        raise HTTPException(status_code=404, detail="No pending confirmation for this job")

    async with _jobs_lock:
        job = _jobs.get(job_id)
        if not job or job["status"] != "awaiting_confirmation":
            raise HTTPException(status_code=409, detail="Job is not awaiting confirmation")
        _jobs[job_id]["status"] = "running"
        _jobs[job_id]["pending_confirmation"] = None
        user = job.get("_user", "scott")

    asyncio.create_task(_resume_job(job_id, checkpoint, confirmed=False, user=user))
    logger.info("Orchestrator job %s denied — resuming with skip", job_id)
    return OrchestrateResponse(job_id=job_id, status="running")


# ---------------------------------------------------------------------------
# Background runners
# ---------------------------------------------------------------------------

async def _run_job(job_id: str, req: OrchestrateRequest, user: str) -> None:
    """Execute the orchestration job and update the job store."""
    async with _jobs_lock:
        _jobs[job_id]["status"] = "running"

    async def _on_progress(msg: str) -> None:
        async with _jobs_lock:
            if job_id not in _jobs:
                return
            _jobs[job_id]["progress"] = msg
            q = _jobs[job_id].get("_event_queue")
        if q:
            await q.put({"type": "progress", "text": msg})

    async def _token_sink(text: str) -> None:
        async with _jobs_lock:
            q = _jobs.get(job_id, {}).get("_event_queue")
        if q:
            await q.put({"type": "token", "text": text})

    try:
        from session_store import load as load_session, save as save_session, generate_session_id

        tier = req.tier or settings.default_tier
        role_cfg = model_registry.get_role_config(user, req.chat_role)
        system_prompt = load_context(
            tier,
            include_long=req.include_long,
            include_mid=req.include_mid,
            include_short=req.include_short,
            role_append=role_cfg.get("system_append", ""),
            inject_datetime=role_cfg.get("inject_datetime", True),
            inject_mode=role_cfg.get("inject_mode", True),
            mode="otr" if req.off_record else "chat",
        )

        session_id = req.session_id or generate_session_id()
        history = load_session(session_id)
        session_messages = history or None

        orch_model = model_registry.get_model_for_role(user, "orchestrator")
        user_role = get_user_role(user)
        tool_list = role_cfg.get("tools")

        policy = get_tool_policy(user)
        confirm_allow = set(policy.get("allow", []))
        confirm_deny = set(policy.get("deny", []))
        max_risk, risk_wl, risk_bl = get_risk_policy(user)

        if orch_model and orch_model.get("type") == "local_openai":
            result = await openai_orchestrator.run(
                task=req.task,
                system_prompt=system_prompt,
                session_messages=session_messages,
                model_cfg=orch_model,
                respond_with_final=req.respond_with_claude,
                user_role=user_role,
                tool_list=tool_list,
                confirm_allow=confirm_allow,
                confirm_deny=confirm_deny,
                max_risk=max_risk,
                risk_whitelist=risk_wl,
                risk_blacklist=risk_bl,
                on_progress=_on_progress,
                token_sink=_token_sink,
            )
        else:
            gemini_key = (
                (orch_model.get("api_key") if orch_model else None)
                or get_user_gemini_key(user)
            )
            result = await orchestrator_engine.run(
                task=req.task,
                system_prompt=system_prompt,
                session_messages=session_messages,
                respond_with_claude=req.respond_with_claude,
                gemini_api_key=gemini_key,
                model_name=orch_model.get("model_name") if orch_model else None,
                response_role=req.chat_role,
                user_role=user_role,
                tool_list=tool_list,
                confirm_allow=confirm_allow,
                confirm_deny=confirm_deny,
                max_rounds=orch_model.get("max_rounds") if orch_model else None,
                max_risk=max_risk,
                risk_whitelist=risk_wl,
                risk_blacklist=risk_bl,
                on_progress=_on_progress,
                token_sink=_token_sink,
            )

        if result.checkpoint:
            async with _checkpoints_lock:
                _checkpoints[job_id] = result.checkpoint
            async with _jobs_lock:
                _jobs[job_id].update({
                    "status": "awaiting_confirmation",
                    "response": result.response,
                    "tool_calls": result.tool_calls,
                    "backend": result.backend,
                    "gemini_summary": result.gemini_summary,
                    "session_id": session_id,
                    "pending_confirmation": {
                        "tools": result.checkpoint.pending_tools,
                        "message": result.response,
                    },
                })
                q = _jobs[job_id].get("_event_queue")
            logger.info("Orchestrator job %s awaiting confirmation — %d tool(s) blocked",
                        job_id, len(result.checkpoint.pending_tools))
            if q:
                await q.put({
                    "type": "confirm",
                    "tools": result.checkpoint.pending_tools,
                    "message": result.response,
                })
            return

        await _finalize_job(job_id, result, session_id, req.task, history, off_record=req.off_record)

    except Exception as e:
        logger.exception("Orchestrator job failed: %s", job_id)
        now = datetime.now(timezone.utc).isoformat()
        async with _jobs_lock:
            _jobs[job_id].update({
                "status": "error",
                "completed_at": now,
                "error": str(e),
            })
            q = _jobs[job_id].get("_event_queue")
        if q:
            await q.put({"type": "error", "message": str(e)})


async def _resume_job(
    job_id: str,
    checkpoint: orchestrator_engine.OrchestrateCheckpoint,
    confirmed: bool,
    user: str,
) -> None:
    """Resume a job after the user confirms or denies a pending tool call."""
    try:
        if checkpoint.engine == "gemini":
            result = await orchestrator_engine.resume(checkpoint, confirmed)
        else:
            result = await openai_orchestrator.resume(checkpoint, confirmed)

        if result.checkpoint:
            # Another confirmation needed (chained gates)
            async with _checkpoints_lock:
                _checkpoints[job_id] = result.checkpoint
            async with _jobs_lock:
                _jobs[job_id].update({
                    "status": "awaiting_confirmation",
                    "response": result.response,
                    "tool_calls": result.tool_calls,
                    "backend": result.backend,
                    "gemini_summary": result.gemini_summary,
                    "pending_confirmation": {
                        "tools": result.checkpoint.pending_tools,
                        "message": result.response,
                    },
                })
            logger.info("Orchestrator job %s awaiting another confirmation", job_id)
            return

        async with _jobs_lock:
            session_id  = _jobs[job_id].get("session_id") or ""
            task        = _jobs[job_id].get("task", "")
            off_record  = _jobs[job_id].get("_off_record", False)

        from session_store import load as load_session
        history = load_session(session_id) if session_id else []
        await _finalize_job(job_id, result, session_id, task, history, off_record=off_record)

    except Exception as e:
        logger.exception("Orchestrator resume failed: %s", job_id)
        now = datetime.now(timezone.utc).isoformat()
        async with _jobs_lock:
            _jobs[job_id].update({
                "status": "error",
                "completed_at": now,
                "error": str(e),
            })


async def _finalize_job(
    job_id: str,
    result: orchestrator_engine.OrchestratorResult,
    session_id: str,
    task: str,
    history: list,
    off_record: bool = False,
) -> None:
    """Save session, log the turn, and mark the job complete."""
    from session_store import save as save_session, generate_session_id
    from session_logger import log_turn

    if not session_id:
        session_id = generate_session_id()

    host = platform.node()
    history.append({"role": "user", "content": task, "off_record": off_record})
    history.append({
        "role": "assistant",
        "content": result.response,
        "backend": result.backend,
        "backend_label": result.backend_label,
        "host": host,
        "off_record": off_record,
    })
    save_session(session_id, history)
    if not off_record:
        log_turn(session_id, task, result.response)

    now = datetime.now(timezone.utc).isoformat()
    async with _jobs_lock:
        _jobs[job_id].update({
            "status": "complete",
            "completed_at": now,
            "session_id": session_id,
            "response": result.response,
            "tool_calls": result.tool_calls,
            "backend": result.backend,
            "backend_label": result.backend_label,
            "host": host,
            "gemini_summary": result.gemini_summary,
        })
        q = _jobs[job_id].get("_event_queue")
    logger.info("Orchestrator job complete: %s (%d tool calls)", job_id, len(result.tool_calls))
    if q:
        await q.put({
            "type": "done",
            "response": result.response,
            "session_id": session_id,
            "backend": result.backend,
            "backend_label": result.backend_label or "",
            "host": host,
            "tool_calls": result.tool_calls,
        })