fix(crud): strip view-only join columns from order_by_li to prevent ambiguous account_id in WHERE

Sorting by join-derived columns (e.g. event_presenter_family_name on v_event_file)
caused MariaDB to expand the view's JOIN inline, making the unqualified account_id
clause from sql_and_qry_part ambiguous — resulting in a 400 SQL error. filter_order_by
now accepts raw_table_name and validates ORDER BY columns against the physical table
only; join-only columns are silently stripped. Also switches filter_order_by off the
global db connection to engine.connect() context managers. Updated all four call sites
in api_crud_v3.py and api_crud_v3_nested.py.

Docs: add order_by_li raw-table limitation and direct download link patterns to
GUIDE__AE_API_V3_for_Frontend.md; record fix in TODO__Agents.md.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Scott Idem
2026-06-10 13:33:09 -04:00
parent 22e5a3c3fd
commit 2429a1f731
5 changed files with 90 additions and 19 deletions

View File

@@ -140,12 +140,18 @@ def apply_forced_account_filter(and_qry_dict: Optional[Dict], account: AccountCo
return forced return forced
def filter_order_by(order_by_li: Any, model: Any, table_name: str = None) -> Optional[Dict[str, str]]: def filter_order_by(order_by_li: Any, model: Any, table_name: str = None, raw_table_name: str = None) -> Optional[Dict[str, str]]:
""" """
Sanitize Sorting Parameters. Sanitize Sorting Parameters.
Prevents SQL injection and logic errors by validating that requested sort columns Prevents SQL injection and logic errors by validating that requested sort columns
actually exist in the Pydantic model and/or the database table. actually exist in the Pydantic model and/or the database table.
When raw_table_name is provided (the physical table, not the view), columns are
validated against it instead of table_name. This prevents view-only join columns
(e.g. event_presenter_family_name on v_event_file) from reaching ORDER BY —
those columns cause MariaDB to expand the view's JOIN inline, making unqualified
WHERE references like `account_id` ambiguous across joined tables.
""" """
if not order_by_li or not isinstance(order_by_li, dict) or not model: if not order_by_li or not isinstance(order_by_li, dict) or not model:
return order_by_li return order_by_li
@@ -156,14 +162,15 @@ def filter_order_by(order_by_li: Any, model: Any, table_name: str = None) -> Opt
model_fields.update({f.alias for f in model.__fields__.values() if f.alias}) model_fields.update({f.alias for f in model.__fields__.values() if f.alias})
filtered = {k: v for k, v in order_by_li.items() if k in model_fields} filtered = {k: v for k, v in order_by_li.items() if k in model_fields}
if table_name and filtered: check_table = raw_table_name or table_name
from app.db_sql import db if check_table and filtered:
from app import lib_sql_core
from sqlalchemy import text from sqlalchemy import text
final_filtered = {} final_filtered = {}
for column in filtered: for column in filtered:
try: try:
# Lightweight check to see if column exists in SQL with lib_sql_core.engine.connect() as conn:
db.execute(text(f"SELECT `{column}` FROM `{table_name}` LIMIT 0")) conn.execute(text(f"SELECT `{column}` FROM `{check_table}` LIMIT 0"))
final_filtered[column] = filtered[column] final_filtered[column] = filtered[column]
except Exception: except Exception:
pass pass

View File

@@ -226,11 +226,12 @@ async def get_obj_li(
table_name = obj_cfg.get(f'tbl_{view}', obj_cfg.get('tbl_default', obj_cfg.get('tbl'))) table_name = obj_cfg.get(f'tbl_{view}', obj_cfg.get('tbl_default', obj_cfg.get('tbl')))
base_name = obj_cfg.get(f'mdl_{view}', obj_cfg.get('mdl_default', obj_cfg.get('mdl'))) base_name = obj_cfg.get(f'mdl_{view}', obj_cfg.get('mdl_default', obj_cfg.get('mdl')))
raw_table_name = obj_cfg.get('tbl_update', obj_cfg.get('tbl'))
if not table_name or not base_name: if not table_name or not base_name:
return mk_resp(data=False, status_code=500, response=response, status_message=f"Configuration error.") return mk_resp(data=False, status_code=500, response=response, status_message=f"Configuration error.")
order_by_li = filter_order_by(order_by_li, base_name, table_name) order_by_li = filter_order_by(order_by_li, base_name, table_name, raw_table_name=raw_table_name)
status_filter = get_supported_filters(base_name, status_filter) status_filter = get_supported_filters(base_name, status_filter)
if not obj_cfg.get('public_read', False): if not obj_cfg.get('public_read', False):
@@ -335,11 +336,12 @@ async def search_obj_li(
table_name = obj_cfg.get(f'tbl_{view}', obj_cfg.get('tbl_default', obj_cfg.get('tbl'))) table_name = obj_cfg.get(f'tbl_{view}', obj_cfg.get('tbl_default', obj_cfg.get('tbl')))
base_name = obj_cfg.get(f'mdl_{view}', obj_cfg.get('mdl_default', obj_cfg.get('mdl'))) base_name = obj_cfg.get(f'mdl_{view}', obj_cfg.get('mdl_default', obj_cfg.get('mdl')))
raw_table_name = obj_cfg.get('tbl_update', obj_cfg.get('tbl'))
if not table_name or not base_name: if not table_name or not base_name:
return mk_resp(data=False, status_code=500, response=response, status_message=f"Configuration error.") return mk_resp(data=False, status_code=500, response=response, status_message=f"Configuration error.")
order_by_li = filter_order_by(order_by_li, base_name, table_name) order_by_li = filter_order_by(order_by_li, base_name, table_name, raw_table_name=raw_table_name)
status_filter = get_supported_filters(base_name, status_filter) status_filter = get_supported_filters(base_name, status_filter)
searchable_fields = obj_cfg.get('searchable_fields') searchable_fields = obj_cfg.get('searchable_fields')

View File

@@ -84,6 +84,7 @@ async def get_child_obj_li(
obj_cfg = obj_type_kv_li[obj_name] obj_cfg = obj_type_kv_li[obj_name]
table_name = obj_cfg.get(f'tbl_{view}', obj_cfg.get('tbl_default', obj_cfg.get('tbl'))) table_name = obj_cfg.get(f'tbl_{view}', obj_cfg.get('tbl_default', obj_cfg.get('tbl')))
base_name = obj_cfg.get(f'mdl_{view}', obj_cfg.get('mdl_default', obj_cfg.get('mdl'))) base_name = obj_cfg.get(f'mdl_{view}', obj_cfg.get('mdl_default', obj_cfg.get('mdl')))
raw_table_name = obj_cfg.get('tbl_update', obj_cfg.get('tbl'))
# Log parent/child resolution details (use INFO so logs appear in production) # Log parent/child resolution details (use INFO so logs appear in production)
log.info("nested.list start parent=%s parent_table=%s parent_id_random=%s child=%s table=%s allowed_parents=%s", parent_obj_type, parent_table, parent_obj_id, obj_name, table_name, obj_cfg.get('parent_types')) log.info("nested.list start parent=%s parent_table=%s parent_id_random=%s child=%s table=%s allowed_parents=%s", parent_obj_type, parent_table, parent_obj_id, obj_name, table_name, obj_cfg.get('parent_types'))
@@ -91,7 +92,7 @@ async def get_child_obj_li(
if not table_name or not base_name: if not table_name or not base_name:
return mk_resp(data=False, status_code=500, response=response, status_message=f"Configuration error.") return mk_resp(data=False, status_code=500, response=response, status_message=f"Configuration error.")
order_by_li = filter_order_by(order_by_li, base_name, table_name) order_by_li = filter_order_by(order_by_li, base_name, table_name, raw_table_name=raw_table_name)
status_filter = get_supported_filters(base_name, status_filter) status_filter = get_supported_filters(base_name, status_filter)
resolved_parent_id = redis_lookup_id_random(record_id_random=parent_obj_id, table_name=parent_table) resolved_parent_id = redis_lookup_id_random(record_id_random=parent_obj_id, table_name=parent_table)
@@ -187,11 +188,12 @@ async def search_child_obj_li(
obj_cfg = obj_type_kv_li[obj_name] obj_cfg = obj_type_kv_li[obj_name]
table_name = obj_cfg.get(f'tbl_{view}', obj_cfg.get('tbl_default', obj_cfg.get('tbl'))) table_name = obj_cfg.get(f'tbl_{view}', obj_cfg.get('tbl_default', obj_cfg.get('tbl')))
base_name = obj_cfg.get(f'mdl_{view}', obj_cfg.get('mdl_default', obj_cfg.get('mdl'))) base_name = obj_cfg.get(f'mdl_{view}', obj_cfg.get('mdl_default', obj_cfg.get('mdl')))
raw_table_name = obj_cfg.get('tbl_update', obj_cfg.get('tbl'))
if not table_name or not base_name: if not table_name or not base_name:
return mk_resp(data=False, status_code=500, response=response, status_message=f"Configuration error.") return mk_resp(data=False, status_code=500, response=response, status_message=f"Configuration error.")
order_by_li = filter_order_by(order_by_li, base_name, table_name) order_by_li = filter_order_by(order_by_li, base_name, table_name, raw_table_name=raw_table_name)
status_filter = get_supported_filters(base_name, status_filter) status_filter = get_supported_filters(base_name, status_filter)
searchable_fields = obj_cfg.get('searchable_fields') searchable_fields = obj_cfg.get('searchable_fields')

View File

@@ -96,6 +96,26 @@ The primary way to retrieve data.
* **Endpoint:** `POST /v3/crud/{obj_type}/search` * **Endpoint:** `POST /v3/crud/{obj_type}/search`
* **Security:** Automatically filters results to only show records belonging to your `x-account-id`. If no account context is provided, it will return **0 records** for private objects. * **Security:** Automatically filters results to only show records belonging to your `x-account-id`. If no account context is provided, it will return **0 records** for private objects.
#### Sorting with `order_by_li`
Pass a JSON object as the `order_by_li` query parameter to sort results:
```ts
// ?order_by_li={"filename":"ASC","created_on":"DESC"}
const params = new URLSearchParams({
order_by_li: JSON.stringify({ filename: 'ASC', created_on: 'DESC' })
});
```
> [!IMPORTANT]
> **`order_by_li` only accepts columns from the raw base table** — not view-only join columns.
>
> Some object types (e.g. `event_file`) have enriched views that JOIN other tables to expose convenience fields like `event_presenter_family_name`. These are available in search results when using `?view=alt`, but they **cannot** be used in `order_by_li`. Attempting to sort by them silently drops those sort keys (the query proceeds without them).
>
> If you need to sort by a joined field, sort client-side on the returned list.
>
> **Columns safe to sort on for `event_file`:** any field in the `event_file` table itself — `filename`, `title`, `extension`, `created_on`, `updated_on`, `sort`, `enable`, etc.
### C. POST Create / PATCH Update ### C. POST Create / PATCH Update
Modify data in the system. Modify data in the system.
* **Endpoints:** * **Endpoints:**
@@ -281,6 +301,47 @@ Every Event File (`event_file`) **must** have a linked Hosted File (`hosted_file
* `hosted_file_size` (string - in bytes) * `hosted_file_size` (string - in bytes)
2. **Nested Hosted File Object:** A full `hosted_file` object will be nested under the `hosted_file` key. This object (`Hosted_File_Base` model) will contain all its standard fields, including `id` (random string ID), `hash_sha256`, `content_type`, `size`, etc. 2. **Nested Hosted File Object:** A full `hosted_file` object will be nested under the `hosted_file` key. This object (`Hosted_File_Base` model) will contain all its standard fields, including `id` (random string ID), `hash_sha256`, `content_type`, `size`, etc.
### Direct Download Links (Shareable / External)
Event files can be downloaded without standard auth headers using one of two bypass mechanisms. This is useful for generating shareable links for staff or external recipients.
- **Method:** `GET`
- **Path:** `/v3/action/event_file/{event_file_id}/download`
The endpoint also accepts `hosted_file_id` or `archive_content_id` directly — it resolves the chain automatically.
#### Auth bypass options
| Query param | Value | When to use |
|---|---|---|
| `?key=<account_id_random>` | Any valid account random ID | Staff sharing within a known account context |
| `?site_key=<site_access_key>` | The site's `access_key` value | Public or semi-public distribution tied to a specific site |
Either param replaces the need for `x-aether-api-key` / `x-account-id` headers, so the URL is self-contained and works in a plain browser tab or `<a href>` link.
#### Optional params
| Query param | Description |
|---|---|
| `filename` | Override the download filename (min 4 chars). Useful for giving files clean display names. |
#### Building a shareable link
```ts
// Build a self-contained download URL for staff/external use
function makeDownloadUrl(eventFileId: string, accountId: string, displayName?: string): string {
const base = `https://dev-api.oneskyit.com/v3/action/event_file/${eventFileId}/download`;
const params = new URLSearchParams({ key: accountId });
if (displayName) params.set('filename', displayName);
return `${base}?${params}`;
}
```
The endpoint supports byte-range requests (`Range` header), so it works correctly for in-browser media streaming as well as direct file downloads.
> [!NOTE]
> The `?key=` bypass verifies only that the account ID exists — it does not confirm the file belongs to that account. It is appropriate for internal staff tools. For publicly distributed links, prefer `?site_key=` which ties access to a specific site's configured key.
--- ---
## 6. Hosted File Actions: Convert & Clip (Frontend Notes) ## 6. Hosted File Actions: Convert & Clip (Frontend Notes)
@@ -301,18 +362,16 @@ These helper endpoints let the frontend request small server-side transformation
- Required query params: `link_to_type`, `link_to_id`, `start_time`, `end_time` (format `HH:MM:SS`) - Required query params: `link_to_type`, `link_to_id`, `start_time`, `end_time` (format `HH:MM:SS`)
- Optional query params: `filename_no_ext` (defaults to `automated_hosted_file_clip_video`), `reencode` (bool), `scale_down` (bool) - Optional query params: `filename_no_ext` (defaults to `automated_hosted_file_clip_video`), `reencode` (bool), `scale_down` (bool)
- Auth: standard V3 headers - Auth: standard V3 headers
- Behavior: extracts a clip using `ffmpeg` and saves it as a new `hosted_file`. Defaults to stream-copying to be fast; set `reencode=true` to force H.264 or `scale_down=true` to resize. Returns 400 on failure. - Behavior: extracts a clip using `ffmpeg` and saves it as a new `hosted_file`.
- Behavior: extracts a clip using `ffmpeg` and saves it as a new `hosted_file`. - Defaults to stream-copying (fast); set `reencode=true` to force H.264 or `scale_down=true` to resize.
- Defaults to stream-copying to be fast; set `reencode=true` to force H.264 or `scale_down=true` to resize. - Add `?background=true` to schedule the clip asynchronously — returns `202 Accepted` immediately; poll the `hosted_file` record for completion.
- For longer-running clips you can schedule the job in the background by adding `?background=true`. When scheduled the API returns `202 Accepted` and the clip runs asynchronously on the server; check the returned `hosted_file` record later via the standard V3 `hosted_file` endpoints. - Returns 400 on synchronous failure; 202 when scheduled successfully.
- Returns 400 on synchronous failure; returns 202 when scheduled successfully.
Frontend guidance: Frontend guidance:
- Call these routes with the same `link_to_type` / `link_to_id` you plan to associate the resulting hosted_file with — the server resolves random IDs for you. - Call these routes with the same `link_to_type` / `link_to_id` you plan to associate the resulting hosted_file with — the server resolves random IDs for you.
- After a successful response, use the V3 `hosted_file` action endpoints (download/delete) to manage or retrieve the new file. - After a successful response, use the V3 `hosted_file` action endpoints (download/delete) to manage or retrieve the new file.
- These endpoints run synchronously and can take time for large inputs; for heavy or batch workloads use a queued job pattern instead. - Prefer `?background=true` for large inputs to avoid request timeouts. For heavy or batch workloads use a queued job pattern instead.
- These endpoints may take time for large inputs. Prefer using `?background=true` to schedule work and receive a `202 Accepted` response for async processing. For heavy or batch workloads use a queued job pattern instead.
--- ---

View File

@@ -23,6 +23,7 @@
- [x] **[P4] Expose `pool_size` / `max_overflow` as env vars** — `create_ae_engine()` calls `settings.DB.get('pool_size', 10)` but `settings.DB` property doesn't include those keys, so they're always hardcoded 10/20. Add `AE_DB_POOL_SIZE` / `AE_DB_POOL_MAX_OVERFLOW` to `config.py`. - [x] **[P4] Expose `pool_size` / `max_overflow` as env vars** — `create_ae_engine()` calls `settings.DB.get('pool_size', 10)` but `settings.DB` property doesn't include those keys, so they're always hardcoded 10/20. Add `AE_DB_POOL_SIZE` / `AE_DB_POOL_MAX_OVERFLOW` to `config.py`.
## 📋 Feature Tasks ## 📋 Feature Tasks
- [x] **`order_by_li` view-join ambiguity fix (June 2026):** Using view-only join columns (e.g. `event_presenter_family_name` from `v_event_file`) in `order_by_li` caused MariaDB error "Unknown column 'account_id' in WHERE" (HTTP 400). Root cause: `filter_order_by` validated columns against the view — which passes for join-derived fields — and `sql_and_qry_part` generates an unqualified `account_id =` clause that becomes ambiguous when MariaDB expands the view's JOIN inline. Fix: `filter_order_by` now accepts `raw_table_name` and validates ORDER BY columns against the physical table only. Join-only view columns are silently stripped. Updated all three call sites in `api_crud_v3.py` (×2) and `api_crud_v3_nested.py` (×2). **Follow-up (lower priority):** qualify `account_id` in `sql_and_qry_part` to fix the root ambiguity for any future JOIN-capable views.
- [x] **Core Isolation:** Harden `apply_forced_account_filter` to Fail-Closed. - [x] **Core Isolation:** Harden `apply_forced_account_filter` to Fail-Closed.
- [x] **IDAA Baseline:** Remove `public_read` from Event, CMS, and Archive objects. - [x] **IDAA Baseline:** Remove `public_read` from Event, CMS, and Archive objects.
- [x] **Detailed Feedback:** Implement descriptive 403 Forbidden reasons. - [x] **Detailed Feedback:** Implement descriptive 403 Forbidden reasons.