diff --git a/app/lib_api_crud_v3.py b/app/lib_api_crud_v3.py index 61813f4..868f55b 100644 --- a/app/lib_api_crud_v3.py +++ b/app/lib_api_crud_v3.py @@ -140,30 +140,37 @@ def apply_forced_account_filter(and_qry_dict: Optional[Dict], account: AccountCo return forced -def filter_order_by(order_by_li: Any, model: Any, table_name: str = None) -> Optional[Dict[str, str]]: +def filter_order_by(order_by_li: Any, model: Any, table_name: str = None, raw_table_name: str = None) -> Optional[Dict[str, str]]: """ Sanitize Sorting Parameters. - + Prevents SQL injection and logic errors by validating that requested sort columns actually exist in the Pydantic model and/or the database table. + + When raw_table_name is provided (the physical table, not the view), columns are + validated against it instead of table_name. This prevents view-only join columns + (e.g. event_presenter_family_name on v_event_file) from reaching ORDER BY — + those columns cause MariaDB to expand the view's JOIN inline, making unqualified + WHERE references like `account_id` ambiguous across joined tables. """ if not order_by_li or not isinstance(order_by_li, dict) or not model: return order_by_li if not hasattr(model, '__fields__'): return order_by_li - + model_fields = set(model.__fields__.keys()) model_fields.update({f.alias for f in model.__fields__.values() if f.alias}) filtered = {k: v for k, v in order_by_li.items() if k in model_fields} - - if table_name and filtered: - from app.db_sql import db + + check_table = raw_table_name or table_name + if check_table and filtered: + from app import lib_sql_core from sqlalchemy import text final_filtered = {} for column in filtered: try: - # Lightweight check to see if column exists in SQL - db.execute(text(f"SELECT `{column}` FROM `{table_name}` LIMIT 0")) + with lib_sql_core.engine.connect() as conn: + conn.execute(text(f"SELECT `{column}` FROM `{check_table}` LIMIT 0")) final_filtered[column] = filtered[column] except Exception: pass diff --git a/app/routers/api_crud_v3.py b/app/routers/api_crud_v3.py index c39824c..4aad9d8 100644 --- a/app/routers/api_crud_v3.py +++ b/app/routers/api_crud_v3.py @@ -226,11 +226,12 @@ async def get_obj_li( table_name = obj_cfg.get(f'tbl_{view}', obj_cfg.get('tbl_default', obj_cfg.get('tbl'))) base_name = obj_cfg.get(f'mdl_{view}', obj_cfg.get('mdl_default', obj_cfg.get('mdl'))) + raw_table_name = obj_cfg.get('tbl_update', obj_cfg.get('tbl')) if not table_name or not base_name: return mk_resp(data=False, status_code=500, response=response, status_message=f"Configuration error.") - order_by_li = filter_order_by(order_by_li, base_name, table_name) + order_by_li = filter_order_by(order_by_li, base_name, table_name, raw_table_name=raw_table_name) status_filter = get_supported_filters(base_name, status_filter) if not obj_cfg.get('public_read', False): @@ -335,11 +336,12 @@ async def search_obj_li( table_name = obj_cfg.get(f'tbl_{view}', obj_cfg.get('tbl_default', obj_cfg.get('tbl'))) base_name = obj_cfg.get(f'mdl_{view}', obj_cfg.get('mdl_default', obj_cfg.get('mdl'))) + raw_table_name = obj_cfg.get('tbl_update', obj_cfg.get('tbl')) if not table_name or not base_name: return mk_resp(data=False, status_code=500, response=response, status_message=f"Configuration error.") - order_by_li = filter_order_by(order_by_li, base_name, table_name) + order_by_li = filter_order_by(order_by_li, base_name, table_name, raw_table_name=raw_table_name) status_filter = get_supported_filters(base_name, status_filter) searchable_fields = obj_cfg.get('searchable_fields') diff --git a/app/routers/api_crud_v3_nested.py b/app/routers/api_crud_v3_nested.py index 2c97633..9734cef 100644 --- a/app/routers/api_crud_v3_nested.py +++ b/app/routers/api_crud_v3_nested.py @@ -84,6 +84,7 @@ async def get_child_obj_li( obj_cfg = obj_type_kv_li[obj_name] table_name = obj_cfg.get(f'tbl_{view}', obj_cfg.get('tbl_default', obj_cfg.get('tbl'))) base_name = obj_cfg.get(f'mdl_{view}', obj_cfg.get('mdl_default', obj_cfg.get('mdl'))) + raw_table_name = obj_cfg.get('tbl_update', obj_cfg.get('tbl')) # Log parent/child resolution details (use INFO so logs appear in production) log.info("nested.list start parent=%s parent_table=%s parent_id_random=%s child=%s table=%s allowed_parents=%s", parent_obj_type, parent_table, parent_obj_id, obj_name, table_name, obj_cfg.get('parent_types')) @@ -91,7 +92,7 @@ async def get_child_obj_li( if not table_name or not base_name: return mk_resp(data=False, status_code=500, response=response, status_message=f"Configuration error.") - order_by_li = filter_order_by(order_by_li, base_name, table_name) + order_by_li = filter_order_by(order_by_li, base_name, table_name, raw_table_name=raw_table_name) status_filter = get_supported_filters(base_name, status_filter) resolved_parent_id = redis_lookup_id_random(record_id_random=parent_obj_id, table_name=parent_table) @@ -187,11 +188,12 @@ async def search_child_obj_li( obj_cfg = obj_type_kv_li[obj_name] table_name = obj_cfg.get(f'tbl_{view}', obj_cfg.get('tbl_default', obj_cfg.get('tbl'))) base_name = obj_cfg.get(f'mdl_{view}', obj_cfg.get('mdl_default', obj_cfg.get('mdl'))) + raw_table_name = obj_cfg.get('tbl_update', obj_cfg.get('tbl')) if not table_name or not base_name: return mk_resp(data=False, status_code=500, response=response, status_message=f"Configuration error.") - order_by_li = filter_order_by(order_by_li, base_name, table_name) + order_by_li = filter_order_by(order_by_li, base_name, table_name, raw_table_name=raw_table_name) status_filter = get_supported_filters(base_name, status_filter) searchable_fields = obj_cfg.get('searchable_fields') diff --git a/documentation/GUIDE__AE_API_V3_for_Frontend.md b/documentation/GUIDE__AE_API_V3_for_Frontend.md index d36bf0d..8ac942c 100644 --- a/documentation/GUIDE__AE_API_V3_for_Frontend.md +++ b/documentation/GUIDE__AE_API_V3_for_Frontend.md @@ -96,6 +96,26 @@ The primary way to retrieve data. * **Endpoint:** `POST /v3/crud/{obj_type}/search` * **Security:** Automatically filters results to only show records belonging to your `x-account-id`. If no account context is provided, it will return **0 records** for private objects. +#### Sorting with `order_by_li` + +Pass a JSON object as the `order_by_li` query parameter to sort results: + +```ts +// ?order_by_li={"filename":"ASC","created_on":"DESC"} +const params = new URLSearchParams({ + order_by_li: JSON.stringify({ filename: 'ASC', created_on: 'DESC' }) +}); +``` + +> [!IMPORTANT] +> **`order_by_li` only accepts columns from the raw base table** — not view-only join columns. +> +> Some object types (e.g. `event_file`) have enriched views that JOIN other tables to expose convenience fields like `event_presenter_family_name`. These are available in search results when using `?view=alt`, but they **cannot** be used in `order_by_li`. Attempting to sort by them silently drops those sort keys (the query proceeds without them). +> +> If you need to sort by a joined field, sort client-side on the returned list. +> +> **Columns safe to sort on for `event_file`:** any field in the `event_file` table itself — `filename`, `title`, `extension`, `created_on`, `updated_on`, `sort`, `enable`, etc. + ### C. POST Create / PATCH Update Modify data in the system. * **Endpoints:** @@ -281,6 +301,47 @@ Every Event File (`event_file`) **must** have a linked Hosted File (`hosted_file * `hosted_file_size` (string - in bytes) 2. **Nested Hosted File Object:** A full `hosted_file` object will be nested under the `hosted_file` key. This object (`Hosted_File_Base` model) will contain all its standard fields, including `id` (random string ID), `hash_sha256`, `content_type`, `size`, etc. +### Direct Download Links (Shareable / External) + +Event files can be downloaded without standard auth headers using one of two bypass mechanisms. This is useful for generating shareable links for staff or external recipients. + +- **Method:** `GET` +- **Path:** `/v3/action/event_file/{event_file_id}/download` + +The endpoint also accepts `hosted_file_id` or `archive_content_id` directly — it resolves the chain automatically. + +#### Auth bypass options + +| Query param | Value | When to use | +|---|---|---| +| `?key=` | Any valid account random ID | Staff sharing within a known account context | +| `?site_key=` | The site's `access_key` value | Public or semi-public distribution tied to a specific site | + +Either param replaces the need for `x-aether-api-key` / `x-account-id` headers, so the URL is self-contained and works in a plain browser tab or `` link. + +#### Optional params + +| Query param | Description | +|---|---| +| `filename` | Override the download filename (min 4 chars). Useful for giving files clean display names. | + +#### Building a shareable link + +```ts +// Build a self-contained download URL for staff/external use +function makeDownloadUrl(eventFileId: string, accountId: string, displayName?: string): string { + const base = `https://dev-api.oneskyit.com/v3/action/event_file/${eventFileId}/download`; + const params = new URLSearchParams({ key: accountId }); + if (displayName) params.set('filename', displayName); + return `${base}?${params}`; +} +``` + +The endpoint supports byte-range requests (`Range` header), so it works correctly for in-browser media streaming as well as direct file downloads. + +> [!NOTE] +> The `?key=` bypass verifies only that the account ID exists — it does not confirm the file belongs to that account. It is appropriate for internal staff tools. For publicly distributed links, prefer `?site_key=` which ties access to a specific site's configured key. + --- ## 6. Hosted File Actions: Convert & Clip (Frontend Notes) @@ -301,18 +362,16 @@ These helper endpoints let the frontend request small server-side transformation - Required query params: `link_to_type`, `link_to_id`, `start_time`, `end_time` (format `HH:MM:SS`) - Optional query params: `filename_no_ext` (defaults to `automated_hosted_file_clip_video`), `reencode` (bool), `scale_down` (bool) - Auth: standard V3 headers - - Behavior: extracts a clip using `ffmpeg` and saves it as a new `hosted_file`. Defaults to stream-copying to be fast; set `reencode=true` to force H.264 or `scale_down=true` to resize. Returns 400 on failure. - - Behavior: extracts a clip using `ffmpeg` and saves it as a new `hosted_file`. - - Defaults to stream-copying to be fast; set `reencode=true` to force H.264 or `scale_down=true` to resize. - - For longer-running clips you can schedule the job in the background by adding `?background=true`. When scheduled the API returns `202 Accepted` and the clip runs asynchronously on the server; check the returned `hosted_file` record later via the standard V3 `hosted_file` endpoints. - - Returns 400 on synchronous failure; returns 202 when scheduled successfully. + - Behavior: extracts a clip using `ffmpeg` and saves it as a new `hosted_file`. + - Defaults to stream-copying (fast); set `reencode=true` to force H.264 or `scale_down=true` to resize. + - Add `?background=true` to schedule the clip asynchronously — returns `202 Accepted` immediately; poll the `hosted_file` record for completion. + - Returns 400 on synchronous failure; 202 when scheduled successfully. Frontend guidance: - Call these routes with the same `link_to_type` / `link_to_id` you plan to associate the resulting hosted_file with — the server resolves random IDs for you. - After a successful response, use the V3 `hosted_file` action endpoints (download/delete) to manage or retrieve the new file. -- These endpoints run synchronously and can take time for large inputs; for heavy or batch workloads use a queued job pattern instead. - - These endpoints may take time for large inputs. Prefer using `?background=true` to schedule work and receive a `202 Accepted` response for async processing. For heavy or batch workloads use a queued job pattern instead. +- Prefer `?background=true` for large inputs to avoid request timeouts. For heavy or batch workloads use a queued job pattern instead. --- diff --git a/documentation/TODO__Agents.md b/documentation/TODO__Agents.md index 4b6c259..d922337 100644 --- a/documentation/TODO__Agents.md +++ b/documentation/TODO__Agents.md @@ -23,6 +23,7 @@ - [x] **[P4] Expose `pool_size` / `max_overflow` as env vars** — `create_ae_engine()` calls `settings.DB.get('pool_size', 10)` but `settings.DB` property doesn't include those keys, so they're always hardcoded 10/20. Add `AE_DB_POOL_SIZE` / `AE_DB_POOL_MAX_OVERFLOW` to `config.py`. ## 📋 Feature Tasks +- [x] **`order_by_li` view-join ambiguity fix (June 2026):** Using view-only join columns (e.g. `event_presenter_family_name` from `v_event_file`) in `order_by_li` caused MariaDB error "Unknown column 'account_id' in WHERE" (HTTP 400). Root cause: `filter_order_by` validated columns against the view — which passes for join-derived fields — and `sql_and_qry_part` generates an unqualified `account_id =` clause that becomes ambiguous when MariaDB expands the view's JOIN inline. Fix: `filter_order_by` now accepts `raw_table_name` and validates ORDER BY columns against the physical table only. Join-only view columns are silently stripped. Updated all three call sites in `api_crud_v3.py` (×2) and `api_crud_v3_nested.py` (×2). **Follow-up (lower priority):** qualify `account_id` in `sql_and_qry_part` to fix the root ambiguity for any future JOIN-capable views. - [x] **Core Isolation:** Harden `apply_forced_account_filter` to Fail-Closed. - [x] **IDAA Baseline:** Remove `public_read` from Event, CMS, and Archive objects. - [x] **Detailed Feedback:** Implement descriptive 403 Forbidden reasons.