From f5cf1ef398dc3ce23cfcd76e688d01b53a90a3b9 Mon Sep 17 00:00:00 2001 From: Scott Idem Date: Thu, 21 May 2026 15:46:30 -0400 Subject: [PATCH] api: separate timeout abort retries from intentional aborts --- documentation/TODO__Agents.md | 39 +++++++++++++++++++++++++++++++ src/lib/ae_api/api_get_object.ts | 18 +++++++++++--- src/lib/ae_api/api_post_object.ts | 17 +++++++++++--- 3 files changed, 68 insertions(+), 6 deletions(-) diff --git a/documentation/TODO__Agents.md b/documentation/TODO__Agents.md index d3b3f600..15441cdb 100644 --- a/documentation/TODO__Agents.md +++ b/documentation/TODO__Agents.md @@ -156,6 +156,45 @@ below. The TTL + `verify_in_flight` guards are the current mitigation. --- +### [API] GET/POST retry hardening — differentiate timeout aborts vs intentional aborts +**Status:** 🚧 Planned follow-up (2026-05-21) + +Recent API helper fixes restored retry/backoff for transient network `TypeError` failures. +Current remaining gap: timeout-triggered aborts are treated the same as intentional/user +aborts, so retries are skipped in both `api_get_object.ts` and `api_post_object.ts`. + +**Decision (for now):** Keep the global default timeout at **20s**. + +**What needs to be implemented:** +- Separate abort reasons in GET/POST helpers: + - **Intentional abort** (navigation/unmount/caller cancel): fail fast, no retry + - **Timeout abort** (helper's own timer): eligible for retry/backoff (same class as transient network) +- Add explicit timeout classification in code (not just `AbortError` name check), so the retry + loop can make a deterministic decision. +- Keep existing capped backoff behavior (`2s -> 4s -> 6s -> 8s`) for retryable timeout/network failures. + +**Timeout policy improvement (class-based):** +- Keep **20s default** as baseline. +- Add request classes with explicit timeout selection at callsites/wrappers (not random per-page values): + - fast CRUD/read/search: ~20s baseline + - medium actions: higher bounded timeout + - heavy actions (uploads, exports, ffmpeg/video clip): explicit long timeout already required +- Centralize the class mapping so timeout intent is clear and audit-friendly. + +**Primary files:** +- `src/lib/ae_api/api_get_object.ts` +- `src/lib/ae_api/api_post_object.ts` +- Wrapper callsites in `src/lib/ae_api/` and legacy bridge points in `src/lib/api/api.ts` + +**Acceptance criteria:** +- Timeout-aborted requests retry according to retry_count/backoff policy. +- User/navigation aborts still fail fast with no retry. +- No regression on 400/401/403/422 fail-fast handling. +- Existing long-running flows that already set explicit timeouts (uploads/video tools/exports) + continue to function without behavior regressions. + +--- + ### [Launcher/VLC] Linux playback — fullscreen + pause-on-end not working **Status:** Mac ✅ working perfectly; Linux 🚧 deferred for later investigation **Date discovered:** 2026-05-20 diff --git a/src/lib/ae_api/api_get_object.ts b/src/lib/ae_api/api_get_object.ts index 748f88bf..6685f443 100644 --- a/src/lib/ae_api/api_get_object.ts +++ b/src/lib/ae_api/api_get_object.ts @@ -205,7 +205,12 @@ export const get_object = async function get_object({ // independent timeout. Sharing a single controller across retries leaves // retries unprotected once the first attempt's clearTimeout() runs. const controller = new AbortController(); + // Track whether THIS helper's timeout fired. AbortError alone is ambiguous: + // it can mean timeout OR intentional caller abort (navigation/unmount). + // We only retry timeout-aborts; intentional aborts should fail fast. + let did_timeout_abort = false; const timeoutId = setTimeout(() => { + did_timeout_abort = true; console.warn(`API GET: Request timed out after ${timeout}ms (attempt ${attempt}/${retry_count}).`); controller.abort(); }, timeout); @@ -245,9 +250,16 @@ export const get_object = async function get_object({ (response.name === 'TypeError' || response.name === 'AbortError')) ) { - // AbortError = intentional cancel: our own timeout fired, or the caller - // aborted (e.g. component unmounted, user navigated away). Never retry. - if (response.name === 'AbortError') return false; + // AbortError can be either timeout or intentional abort. + // Retry only helper-owned timeout aborts; fail fast on caller abort. + if (response.name === 'AbortError') { + if (did_timeout_abort) { + throw new Error( + `Timeout abort (attempt ${attempt}/${retry_count}) after ${timeout}ms` + ); + } + return false; + } // TypeError = transient network failure (ERR_NETWORK_CHANGED, // ERR_NETWORK_IO_SUSPENDED, hotel/conference WiFi blip, etc.). diff --git a/src/lib/ae_api/api_post_object.ts b/src/lib/ae_api/api_post_object.ts index cf976601..686d340f 100644 --- a/src/lib/ae_api/api_post_object.ts +++ b/src/lib/ae_api/api_post_object.ts @@ -203,7 +203,11 @@ export const post_object = async function post_object({ // Declared at loop scope (not inside try) so the catch block can clearTimeout. // Fresh controller per attempt — same rationale as api_get_object.ts. const controller = new AbortController(); + // AbortError is not specific enough by itself. Distinguish timeout-aborts + // (retryable transient class) from intentional caller aborts (fail-fast). + let did_timeout_abort = false; const timeoutId = setTimeout(() => { + did_timeout_abort = true; console.warn(`API POST: Request timed out after ${timeout}ms (attempt ${attempt}/${retry_count}).`); controller.abort(); }, timeout); @@ -254,9 +258,16 @@ export const post_object = async function post_object({ (response.name === 'TypeError' || response.name === 'AbortError')) ) { - // AbortError = intentional cancel (timeout fired, or caller aborted). - // Never retry — the abort was deliberate. - if (response.name === 'AbortError') return false; + // Retry timeout-aborts from this helper; do not retry caller aborts + // (route change/unmount/manual cancellation). + if (response.name === 'AbortError') { + if (did_timeout_abort) { + throw new Error( + `Timeout abort (attempt ${attempt}/${retry_count}) after ${timeout}ms` + ); + } + return false; + } // TypeError = transient network failure. Throw into the retry loop // so backoff-and-retry applies. Same fix as api_get_object.ts — see