diff --git a/cortex/llm_client.py b/cortex/llm_client.py index a67f9df..b810912 100644 --- a/cortex/llm_client.py +++ b/cortex/llm_client.py @@ -87,9 +87,15 @@ async def complete( return response, primary except Exception as e: err_str = str(e) - logger.warning("%s failed (%s) — falling back to %s", primary, e, fallback) if primary == "claude" and any(k in err_str for k in ("401", "authenticate", "expired", "OAuth")): await event_bus.publish({"type": "claude_auth_expired"}) + # Only fall back when using a default/auto backend. + # If the user has explicitly configured a model via the registry, + # surface the error so they know something is wrong. + if resolved_cfg is not None: + logger.error("%s failed (no fallback — model explicitly configured): %s", primary, e) + raise + logger.warning("%s failed (%s) — falling back to %s", primary, e, fallback) response = await _dispatch(fallback, system_prompt, messages, None) return response, fallback