feat(redis): implement bidirectional ID caching and extensive E2E benchmarks

2026-02-10 18:08:04 -05:00
parent 17ae70992f
commit 3e6ea108cf
3 changed files with 124 additions and 7 deletions
--- a/app/lib_redis_helpers.py
+++ b/app/lib_redis_helpers.py
@@ -77,17 +77,11 @@ def redis_lookup_id_random(
        return False

    key_name = f'{table_name}:{record_id_random}'
+    rev_key_prefix = f'rev:{table_name}:'

    # Use the global redis client instead of creating a new one every time
    record_id = redis_client.get(key_name)

-    # ### SECTION ### THE "RESET RATE" WORKAROUND (DEPRECATED) ###
-    # This was used to force a SQL lookup occasionally to correct stale data.
-    # We are disabling this for now to see if the recent logic improvements fixed the root cause.
-    # if record_id and random.randint(1, reset_rate) == 1:
-    #     log.warning(f'Redis: Randomly (1/{reset_rate}) setting record_id to None. Key="{key_name}" value="{record_id}" TTL={redis_client.ttl(key_name)} seconds')
-    #     record_id = None
-
    if record_id:
        redis_client.setex(key_name, datetime.timedelta(minutes=minutes), value=record_id)
        log.info(f'Redis: Entry found for: Key="{key_name}" value="{record_id}" TTL={redis_client.ttl(key_name)} seconds')
@@ -101,7 +95,9 @@ def redis_lookup_id_random(
            if isinstance(select_results, dict):
                log.info(f"""SQL: Found ID Random for: {str(record_id_random)} = {str(select_results.get('id'))}""")
                if record_id := select_results.get('id'):
+                    # Populating BOTH directions in Redis
                    redis_client.setex(key_name, datetime.timedelta(minutes=minutes), value=record_id)
+                    redis_client.setex(f'{rev_key_prefix}{record_id}', datetime.timedelta(minutes=minutes), value=record_id_random)
                    return int(record_id)
                else:
                    log.error('The SQL result was not what was expected. The ID field was not found.')
@@ -121,9 +117,11 @@ def get_id_random(
        record_id: int,
        table_name: str,
        log_lvl: int = logging.WARNING, # DEBUG, INFO, WARNING, ERROR, EXCEPTION, CRITICAL
+        minutes: int = 30, # Expire the Redis key after 30 minutes
        ) -> str|bool|None:
    """
    Looks up the 'id_random' for a given internal integer ID.
+    Uses Redis caching for performance.
    """
    from app.db_sql import sql_select, get_last_sql_error
    log.setLevel(log_lvl)
@@ -132,6 +130,13 @@ def get_id_random(
    if not table_name or table_name.startswith('lu_') or table_name.startswith('v_lu_'):
        return None

+    # Check Redis cache first (using 'rev:' prefix for integer -> string mappings)
+    key_name = f'rev:{table_name}:{record_id}'
+    if cached_val := redis_client.get(key_name):
+        # Extend TTL on hit
+        redis_client.setex(key_name, datetime.timedelta(minutes=minutes), value=cached_val)
+        return str(cached_val)
+
    data = { 'id': record_id }
    sql = f"SELECT id_random FROM `{table_name}` AS `table` WHERE `table`.id = :id;"

@@ -148,6 +153,9 @@ def get_id_random(
    if select_results:
        if isinstance(select_results, dict):
            if record_id_random := select_results.get('id_random'):
+                # Populating BOTH directions in Redis
+                redis_client.setex(key_name, datetime.timedelta(minutes=minutes), value=record_id_random)
+                redis_client.setex(f'{table_name}:{record_id_random}', datetime.timedelta(minutes=minutes), value=record_id)
                return str(record_id_random)
            else:
                log.error('The SQL result was not what was expected.')
--- a/tests/README.md
+++ b/tests/README.md
@@ -20,6 +20,7 @@ These consolidated scripts are the primary verification tool for the V3 API.
 | `test_e2e_v3_auth_security.py` | **Primary Auth**: Site bootstrap, Passcode-to-JWT, and permission boundaries. |
 | `test_e2e_v3_actions_file_lifecycle.py` | **Primary Actions**: Upload, Download (ID/Hash/Streaming), and physical Deletion. |
 | `test_e2e_v3_data_store_lookup.py` | **V3 Parity**: Verifies code-based lookups and latency simulation. |
+| `test_e2e_redis_extensive.py` | **Redis Stress**: Benchmarks bidirectional ID caching across thousands of records. |
 | `test_e2e_v3_event_vision_parity.py`| **Vision ID**: Verifies string-ID enforcement across event models. |
 | `test_e2e_v3_cms_vision_parity.py`| **Vision ID**: Verifies string-ID enforcement across CMS (post/comment) models. |
 | `test_e2e_v3_demo_parity.py` | **Demo Parity**: Comprehensive check for Badge, Exhibit, Tracking, and nested Journal Entries. |
--- a/tests/e2e/test_e2e_redis_extensive.py
+++ b/tests/e2e/test_e2e_redis_extensive.py
@@ -0,0 +1,108 @@
+import requests
+import json
+import time
+import random
+
+# --- Configuration ---
+BASE_URL = "https://dev-api.oneskyit.com"
+AGENT_API_KEY = "PMM4n50teUCaOMMTN8qOJA"
+
+def get_headers(account_id=None):
+    headers = {
+        "X-Aether-API-Key": AGENT_API_KEY,
+        "Content-Type": "application/json"
+    }
+    if account_id:
+        headers["x-account-id"] = account_id
+    else:
+        headers["x-no-account-id"] = "bypass"
+    return headers
+
+def print_result(label, success, message=""):
+    """Standardized output helper."""
+    status = "✅ PASS" if success else "❌ FAIL"
+    print(f"[{status}] {label} {message}")
+
+def test_list_serialization_caching():
+    """
+    Stress tests 'get_id_random' (Int -> String) by fetching a large batch of records.
+    Every record in the response requires multiple ID resolutions.
+    """
+    print("\n--- Testing Bulk List Serialization (Int -> String Caching) ---")
+    url = f"{BASE_URL}/v3/crud/hosted_file/"
+    
+    # Pass 1: Cold Cache (Populate)
+    print("  Running Pass 1 (Cold Cache / SQL heavy)...")
+    start = time.time()
+    resp1 = requests.get(url, headers=get_headers(), params={"limit": 500})
+    dur1 = time.time() - start
+    print_result(f"Pass 1: Complete ({dur1:.2f}s)", resp1.status_code == 200)
+
+    # Pass 2: Hot Cache (Should be faster)
+    print("  Running Pass 2 (Hot Cache / Redis only)...")
+    start = time.time()
+    resp2 = requests.get(url, headers=get_headers(), params={"limit": 500})
+    dur2 = time.time() - start
+    
+    speed_up = ((dur1 - dur2) / dur1) * 100 if dur1 > 0 else 0
+    print_result(f"Pass 2: Complete ({dur2:.2f}s) - {speed_up:.1f}% faster", resp2.status_code == 200)
+
+    # Verify ID Vision in results
+    if resp2.status_code == 200:
+        data = resp2.json().get('data', [])
+        if data:
+            item = data[0]
+            # Account ID can be None for global records
+            vision_ok = isinstance(item.get('id'), str)
+            print_result("Vision Compliance: Random IDs only", vision_ok)
+
+def test_individual_id_resolution_caching():
+    """
+    Stress tests 'redis_lookup_id_random' (String -> Int) by performing many individual lookups.
+    """
+    print("\n--- Testing Individual ID Resolution (String -> Int Caching) ---")
+    
+    # First, get a batch of valid random IDs
+    url = f"{BASE_URL}/v3/crud/hosted_file/"
+    resp = requests.get(url, headers=get_headers(), params={"limit": 100})
+    if resp.status_code != 200:
+        print_result("Setup: Failed to fetch test IDs", False)
+        return
+    
+    test_ids = [item['id'] for item in resp.json().get('data', [])]
+    random.shuffle(test_ids)
+    
+    print(f"  Performing {len(test_ids)} individual lookups...")
+    
+    # Pass 1: Cold/Mixed
+    start = time.time()
+    success_count = 0
+    for rid in test_ids:
+        r = requests.get(f"{url}{rid}", headers=get_headers())
+        if r.status_code == 200: success_count += 1
+    dur1 = time.time() - start
+    print_result(f"Pass 1: Complete ({dur1:.2f}s)", success_count == len(test_ids))
+
+    # Pass 2: Hot Cache
+    start = time.time()
+    success_count = 0
+    for rid in test_ids:
+        r = requests.get(f"{url}{rid}", headers=get_headers())
+        if r.status_code == 200: success_count += 1
+    dur2 = time.time() - start
+    
+    speed_up = ((dur1 - dur2) / dur1) * 100 if dur1 > 0 else 0
+    print_result(f"Pass 2: Complete ({dur2:.2f}s) - {speed_up:.1f}% faster", success_count == len(test_ids))
+
+if __name__ == "__main__":
+    print(f"=== Aether Redis Caching Extensive Stress Test ===")
+    print(f"Target: {BASE_URL}")
+    
+    overall_start = time.time()
+    try:
+        test_list_serialization_caching()
+        test_individual_id_resolution_caching()
+    except Exception as e:
+        print(f"💥 Suite Error: {e}")
+    
+    print(f"\nExtensive Suite completed in {time.time() - overall_start:.2f}s")