diff --git a/cortex/model_registry.py b/cortex/model_registry.py index d2d3e12..82c71f1 100644 --- a/cortex/model_registry.py +++ b/cortex/model_registry.py @@ -664,7 +664,8 @@ def save_model(username: str, model_id: str | None, host_id: str, label: str, model_name: str, context_k: int = 0, tags: list[str] | None = None, max_rounds: int | None = None, - tools: bool = True) -> str: + tools: bool = True, + reasoning_budget_tokens: int | None = None) -> str: """Create or update a local_openai model entry. Returns the model ID.""" data = _load(username) tags = tags or [] @@ -672,29 +673,31 @@ def save_model(username: str, model_id: str | None, host_id: str, if model_id: for m in data["models"]: if m["id"] == model_id: - m["host_id"] = host_id - m["label"] = label.strip() or model_name.strip() - m["model_name"] = model_name.strip() - m["context_k"] = context_k - m["max_rounds"] = max_rounds - m["tools"] = tools - m["tags"] = tags + m["host_id"] = host_id + m["label"] = label.strip() or model_name.strip() + m["model_name"] = model_name.strip() + m["context_k"] = context_k + m["max_rounds"] = max_rounds + m["tools"] = tools + m["tags"] = tags + m["reasoning_budget_tokens"] = reasoning_budget_tokens _save(username, data) return model_id model_id = None model_id = secrets.token_hex(4) data["models"].append({ - "id": model_id, - "type": "local_openai", - "label": label.strip() or model_name.strip(), - "model_name": model_name.strip(), - "provider": "local", - "host_id": host_id, - "context_k": context_k, - "max_rounds": max_rounds, - "tools": tools, - "tags": tags, + "id": model_id, + "type": "local_openai", + "label": label.strip() or model_name.strip(), + "model_name": model_name.strip(), + "provider": "local", + "host_id": host_id, + "context_k": context_k, + "max_rounds": max_rounds, + "tools": tools, + "tags": tags, + "reasoning_budget_tokens": reasoning_budget_tokens, }) _save(username, data) return model_id diff --git a/cortex/openai_orchestrator.py b/cortex/openai_orchestrator.py index d8aba00..fc37da4 100644 --- a/cortex/openai_orchestrator.py +++ b/cortex/openai_orchestrator.py @@ -287,6 +287,9 @@ async def _run_from_messages( if active_tools: call_kwargs["tools"] = active_tools call_kwargs["tool_choice"] = "auto" + reasoning_budget = (model_cfg or {}).get("reasoning_budget_tokens") + if reasoning_budget: + call_kwargs["extra_body"] = {"reasoning": {"budget_tokens": reasoning_budget}} response = await _chat_with_retry(client, **call_kwargs) choice = response.choices[0] @@ -346,6 +349,8 @@ async def _run_from_messages( conf_call: dict = {"model": model_name, "messages": messages, "tool_choice": "none"} if active_tools: conf_call["tools"] = active_tools + if reasoning_budget: + conf_call["extra_body"] = {"reasoning": {"budget_tokens": reasoning_budget}} conf_resp = await _chat_with_retry(client, **conf_call) final_response = conf_resp.choices[0].message.content or ( "This action requires your explicit confirmation before it can proceed." diff --git a/cortex/routers/local_llm.py b/cortex/routers/local_llm.py index 5d63538..3d853e0 100644 --- a/cortex/routers/local_llm.py +++ b/cortex/routers/local_llm.py @@ -204,12 +204,13 @@ def _render(username: str, success: str = "", error: str = "") -> str: else: extra_fields = '' - cur_label = m.get("label", "") - cur_model_name = m.get("model_name", "") - cur_ctx = m.get("context_k", 0) or 0 - cur_max_rounds = m.get("max_rounds") or 0 - cur_tools = m.get("tools", True) - cur_tags = ", ".join(m.get("tags") or []) + cur_label = m.get("label", "") + cur_model_name = m.get("model_name", "") + cur_ctx = m.get("context_k", 0) or 0 + cur_max_rounds = m.get("max_rounds") or 0 + cur_tools = m.get("tools", True) + cur_tags = ", ".join(m.get("tags") or []) + cur_reasoning_budget = m.get("reasoning_budget_tokens") or 0 model_rows += f'''
@@ -256,6 +257,11 @@ def _render(username: str, success: str = "", error: str = "") -> str:
+
+ + +