revert(lookup): restore PARTITION BY group; tests now track data fix

Reverts the PARTITION BY name change — group is the correct dedup key. Partitioning by name broke country deduplication (two US records both survived, causing Svelte each_key_duplicate on alpha_2_code='US'). Root cause is bad seed data in lu_v3_time_zone: group='United States' for 13 US/* zones and group='Europe' for 63 Europe/* zones instead of group=name. A separate DB UPDATE is required to fix those rows. Tests updated to assert: - No duplicate alpha_2_code in country list (PARTITION BY group regression) - All 13 US/* and Europe/* spot-check zones present (pending DB data fix) - priority-only timezone count == 72 (pending DB data fix) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-23 17:31:30 -04:00
parent ccf2f30e11
commit c9ec3d7ea1
2 changed files with 96 additions and 33 deletions
--- a/app/methods/lookup_methods.py
+++ b/app/methods/lookup_methods.py
@@ -28,7 +28,7 @@ def get_lookup_list_v3(
        SELECT * FROM (
            SELECT *, 
                   ROW_NUMBER() OVER (
-                       PARTITION BY `name`
+                       PARTITION BY `group`
                       ORDER BY
                          (for_type = :for_type AND for_id = :for_id) DESC,
                          (account_id = :account_id) DESC, 
--- a/tests/e2e/test_e2e_v3_lookup.py
+++ b/tests/e2e/test_e2e_v3_lookup.py
@@ -14,7 +14,20 @@ HEADERS = {
 }

 # TODO: SET THIS to your demo site's random ID
-SITE_ID_RANDOM = "92vkYC4fVEl" 
+SITE_ID_RANDOM = "92vkYC4fVEl"
+
+# All US/* priority timezones — group must equal name in lu_v3_time_zone for these to survive
+# PARTITION BY group dedup. If group="United States" for these, only 1 survives.
+US_TIMEZONES = [
+    "US/Alaska", "US/Aleutian", "US/Arizona", "US/Central", "US/East-Indiana",
+    "US/Eastern", "US/Hawaii", "US/Indiana-Starke", "US/Michigan",
+    "US/Mountain", "US/Pacific", "US/Pacific-New", "US/Samoa",
+]
+
+# Spot-check a subset of Europe/* priority timezones — same root cause as US/*
+EUROPE_TIMEZONES_SAMPLE = [
+    "Europe/London", "Europe/Paris", "Europe/Prague", "Europe/Rome",
+]

 def print_result(label, success, message=""):
    status = "✅ PASS" if success else "❌ FAIL"
@@ -30,17 +43,17 @@ def test_lookup_list(lu_type, site_id=None, only_priority=False):
    if only_priority:
        params["only_priority"] = "true"
        label += " (Priority Only)"
-        
+
    try:
        start_time = time.time()
        response = requests.get(url, headers=HEADERS, params=params)
        duration = time.time() - start_time
-        
+
        if response.status_code == 200:
            data = response.json().get('data', [])
            msg = f"Found {len(data)} items ({duration:.2f}s)"
            print_result(label, True, msg)
-            
+
            # Print top 10 for sorting verification
            if data and not site_id: # Only print for full or priority lists
                limit = 10 if not only_priority else len(data)
@@ -49,7 +62,7 @@ def test_lookup_list(lu_type, site_id=None, only_priority=False):
                    prio = item.get('priority', 0)
                    sort = item.get('sort', 0)
                    print(f"   [{i+1}] {item.get('name')} (Prio: {prio}, Sort: {sort})")
-            
+
            return data
        else:
            print_result(label, False, f"Status {response.status_code}: {response.text[:100]}")
@@ -75,49 +88,99 @@ def test_lookup_resolve(lu_type, query):
        print_result(f"GET /{lu_type}/resolve?q={query}", False, str(e))
        return False

-US_TIMEZONES = [
-    "US/Alaska", "US/Arizona", "US/Central", "US/East-Indiana",
-    "US/Eastern", "US/Hawaii", "US/Indiana-Starke", "US/Michigan",
-    "US/Mountain", "US/Pacific", "US/Pacific-New", "US/Samoa",
-]
-
-def test_timezone_us_dedup():
-    """Regression: PARTITION BY `name` fix — all 12 US/* zones must appear."""
-    label = "time_zone US/* deduplication (regression)"
-    data = test_lookup_list("time_zone")
+def test_timezone_us_dedup(data):
+    """
+    Regression: lu_v3_time_zone group data fix.
+    All 13 US/* priority zones must appear individually.
+    Root cause: group was seeded as 'United States' instead of name — PARTITION BY group
+    collapsed all 13 into one winner.
+    """
+    label = "time_zone: all 13 US/* zones present (group=name data fix)"
    if data is None:
+        print_result(label, False, "No data")
        return
    names = {item.get("name") for item in data}
    missing = [tz for tz in US_TIMEZONES if tz not in names]
    if missing:
-        print_result(label, False, f"Missing: {missing}")
+        print_result(label, False, f"Missing (group data not yet fixed?): {missing}")
    else:
-        print_result(label, True, f"All 12 US/* timezones present ({len(data)} total)")
+        print_result(label, True, f"All {len(US_TIMEZONES)} US/* timezones present")
+
+def test_timezone_europe_dedup(data):
+    """
+    Regression: same root cause as US/* — group was 'Europe' for all Europe/* zones.
+    Spot-check that the priority ones appear individually after data fix.
+    """
+    label = "time_zone: Europe/* spot-check (group=name data fix)"
+    if data is None:
+        print_result(label, False, "No data")
+        return
+    names = {item.get("name") for item in data}
+    missing = [tz for tz in EUROPE_TIMEZONES_SAMPLE if tz not in names]
+    if missing:
+        print_result(label, False, f"Missing (group data not yet fixed?): {missing}")
+    else:
+        print_result(label, True, f"Europe/* spot-check passed ({len(EUROPE_TIMEZONES_SAMPLE)} zones found)")
+
+def test_country_us_dedup(data):
+    """
+    Regression: PARTITION BY group must NOT produce duplicate alpha_2_code values.
+    Two records exist for alpha_2_code='US' (global default + account override) — only one
+    should survive. If PARTITION BY name were used, both would appear and Svelte would
+    throw each_key_duplicate on alpha_2_code='US'.
+    """
+    label = "country: no duplicate alpha_2_code (PARTITION BY group dedup)"
+    if data is None:
+        print_result(label, False, "No data")
+        return
+    codes = [item.get("alpha_2_code") for item in data if item.get("alpha_2_code")]
+    duplicates = [c for c in set(codes) if codes.count(c) > 1]
+    if duplicates:
+        print_result(label, False, f"Duplicate alpha_2_codes: {duplicates}")
+    else:
+        print_result(label, True, f"No duplicates across {len(data)} countries")
+
+def test_priority_only_count(data, expected=72):
+    """priority=1 enabled timezones: should be exactly {expected} after data fix."""
+    label = f"time_zone priority-only count == {expected}"
+    if data is None:
+        print_result(label, False, "No data")
+        return
+    if len(data) == expected:
+        print_result(label, True, f"{len(data)} records")
+    else:
+        print_result(label, False, f"Got {len(data)}, expected {expected} (data fix pending?)")

 if __name__ == "__main__":
    print(f"🚀 Starting V3 Lookup E2E Suite ({BASE_URL})\n")
    start_suite = time.time()

-    # 1. Basic Lists (Phase 1)
-    test_lookup_list("country")
+    # 1. Country — basic list + dedup regression
+    print("--- Country ---")
+    country_data = test_lookup_list("country")
+    test_country_us_dedup(country_data)

-    print("\n--- Regression: US/* timezone deduplication ---")
-    test_timezone_us_dedup()
+    # 2. Timezone — full list + group data fix regressions
+    print("\n--- Timezone (full list) ---")
+    tz_data = test_lookup_list("time_zone")
+    test_timezone_us_dedup(tz_data)
+    test_timezone_europe_dedup(tz_data)

-    print("\n--- Testing Priority Only ---")
-    test_lookup_list("time_zone", only_priority=True)
-    
-    # 2. Whitelist Test (Phase 2)
+    # 3. Timezone — priority only
+    print("\n--- Timezone (priority only) ---")
+    tz_priority_data = test_lookup_list("time_zone", only_priority=True)
+    test_priority_only_count(tz_priority_data, expected=72)
+
+    # 4. Whitelist Test
    if SITE_ID_RANDOM != "SET_ME_TO_SITE_ID":
-        print("\n--- Testing Site Whitelist Policy ---")
-        # Should return only whitelisted items
+        print("\n--- Site Whitelist Policy ---")
        test_lookup_list("country", site_id=SITE_ID_RANDOM)
        test_lookup_list("time_zone", site_id=SITE_ID_RANDOM)
    else:
-        print("\n⚠️ Skipping Phase 2 test: SITE_ID_RANDOM not set.")
-    
-    # 3. Resolve Test
-    print("\n--- Testing Resolve ---")
+        print("\n⚠️ Skipping whitelist test: SITE_ID_RANDOM not set.")
+
+    # 5. Resolve
+    print("\n--- Resolve ---")
    test_lookup_resolve("country", "US")
-    
+
    print(f"\n⏱️ Suite completed in {time.time() - start_suite:.2f}s")