From 27ca7c7efdc86b9f5bda239142247ea497e758b1 Mon Sep 17 00:00:00 2001
From: Scott Idem <stidem@gmail.com>
Date: Wed, 8 Apr 2026 23:16:33 -0400
Subject: [PATCH] fix: apply host_type path correction in OpenAI orchestrator
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The AsyncOpenAI client always appends /chat/completions to base_url.
Open WebUI's endpoint is at /api/chat/completions, so for openwebui
host_type the base_url must include the /api prefix — same logic as
_local() in llm_client.py.

Also strip non-standard metadata fields (backend, host, etc.) from
session_messages before passing them to the API.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 cortex/openai_orchestrator.py | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/cortex/openai_orchestrator.py b/cortex/openai_orchestrator.py
index 24c3280..0041634 100644
--- a/cortex/openai_orchestrator.py
+++ b/cortex/openai_orchestrator.py
@@ -62,24 +62,35 @@ async def run(
     if not model_cfg:
         raise RuntimeError("model_cfg is required for the OpenAI orchestrator")
 
-    api_url   = model_cfg.get("api_url", "")
-    api_key   = model_cfg.get("api_key", "") or "none"
+    api_url    = model_cfg.get("api_url", "")
+    api_key    = model_cfg.get("api_key", "") or "none"
     model_name = model_cfg.get("model_name", "")
+    host_type  = model_cfg.get("host_type", "openwebui")
 
     if not api_url or not model_name:
         raise RuntimeError(
             f"model_cfg missing api_url or model_name: {model_cfg.get('label', model_cfg)}"
         )
 
-    client = AsyncOpenAI(base_url=api_url, api_key=api_key)
+    # Open WebUI's OpenAI-compatible endpoint lives at /api/chat/completions,
+    # so the SDK base_url needs the /api prefix; standard OpenAI-layout hosts don't.
+    base_url = api_url.rstrip("/")
+    if host_type == "openwebui":
+        base_url = base_url + "/api"
+
+    client = AsyncOpenAI(base_url=base_url, api_key=api_key)
 
     # System prompt: persona context + brief tool instruction
     sys_content = (system_prompt or "") + _TOOL_INSTRUCTION
 
     # Build messages: [system, ...recent_session, current_task]
+    # Strip non-standard metadata fields (backend, host, etc.) before sending.
     messages: list[dict] = [{"role": "system", "content": sys_content}]
     if session_messages:
-        messages.extend(session_messages[-6:])   # last 3 turns for context
+        messages.extend(
+            {"role": m["role"], "content": m["content"]}
+            for m in session_messages[-6:]
+        )
     messages.append({"role": "user", "content": task})
 
     tool_call_log: list[dict] = []