revert(lookup): restore PARTITION BY group; tests now track data fix

Reverts the PARTITION BY name change — group is the correct dedup key.
Partitioning by name broke country deduplication (two US records both
survived, causing Svelte each_key_duplicate on alpha_2_code='US').

Root cause is bad seed data in lu_v3_time_zone: group='United States'
for 13 US/* zones and group='Europe' for 63 Europe/* zones instead of
group=name. A separate DB UPDATE is required to fix those rows.

Tests updated to assert:
- No duplicate alpha_2_code in country list (PARTITION BY group regression)
- All 13 US/* and Europe/* spot-check zones present (pending DB data fix)
- priority-only timezone count == 72 (pending DB data fix)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Scott Idem
2026-03-23 17:31:30 -04:00
parent ccf2f30e11
commit c9ec3d7ea1
2 changed files with 96 additions and 33 deletions

View File

@@ -28,7 +28,7 @@ def get_lookup_list_v3(
SELECT * FROM (
SELECT *,
ROW_NUMBER() OVER (
PARTITION BY `name`
PARTITION BY `group`
ORDER BY
(for_type = :for_type AND for_id = :for_id) DESC,
(account_id = :account_id) DESC,

View File

@@ -14,7 +14,20 @@ HEADERS = {
}
# TODO: SET THIS to your demo site's random ID
SITE_ID_RANDOM = "92vkYC4fVEl"
SITE_ID_RANDOM = "92vkYC4fVEl"
# All US/* priority timezones — group must equal name in lu_v3_time_zone for these to survive
# PARTITION BY group dedup. If group="United States" for these, only 1 survives.
US_TIMEZONES = [
"US/Alaska", "US/Aleutian", "US/Arizona", "US/Central", "US/East-Indiana",
"US/Eastern", "US/Hawaii", "US/Indiana-Starke", "US/Michigan",
"US/Mountain", "US/Pacific", "US/Pacific-New", "US/Samoa",
]
# Spot-check a subset of Europe/* priority timezones — same root cause as US/*
EUROPE_TIMEZONES_SAMPLE = [
"Europe/London", "Europe/Paris", "Europe/Prague", "Europe/Rome",
]
def print_result(label, success, message=""):
status = "✅ PASS" if success else "❌ FAIL"
@@ -30,17 +43,17 @@ def test_lookup_list(lu_type, site_id=None, only_priority=False):
if only_priority:
params["only_priority"] = "true"
label += " (Priority Only)"
try:
start_time = time.time()
response = requests.get(url, headers=HEADERS, params=params)
duration = time.time() - start_time
if response.status_code == 200:
data = response.json().get('data', [])
msg = f"Found {len(data)} items ({duration:.2f}s)"
print_result(label, True, msg)
# Print top 10 for sorting verification
if data and not site_id: # Only print for full or priority lists
limit = 10 if not only_priority else len(data)
@@ -49,7 +62,7 @@ def test_lookup_list(lu_type, site_id=None, only_priority=False):
prio = item.get('priority', 0)
sort = item.get('sort', 0)
print(f" [{i+1}] {item.get('name')} (Prio: {prio}, Sort: {sort})")
return data
else:
print_result(label, False, f"Status {response.status_code}: {response.text[:100]}")
@@ -75,49 +88,99 @@ def test_lookup_resolve(lu_type, query):
print_result(f"GET /{lu_type}/resolve?q={query}", False, str(e))
return False
US_TIMEZONES = [
"US/Alaska", "US/Arizona", "US/Central", "US/East-Indiana",
"US/Eastern", "US/Hawaii", "US/Indiana-Starke", "US/Michigan",
"US/Mountain", "US/Pacific", "US/Pacific-New", "US/Samoa",
]
def test_timezone_us_dedup():
"""Regression: PARTITION BY `name` fix — all 12 US/* zones must appear."""
label = "time_zone US/* deduplication (regression)"
data = test_lookup_list("time_zone")
def test_timezone_us_dedup(data):
"""
Regression: lu_v3_time_zone group data fix.
All 13 US/* priority zones must appear individually.
Root cause: group was seeded as 'United States' instead of name — PARTITION BY group
collapsed all 13 into one winner.
"""
label = "time_zone: all 13 US/* zones present (group=name data fix)"
if data is None:
print_result(label, False, "No data")
return
names = {item.get("name") for item in data}
missing = [tz for tz in US_TIMEZONES if tz not in names]
if missing:
print_result(label, False, f"Missing: {missing}")
print_result(label, False, f"Missing (group data not yet fixed?): {missing}")
else:
print_result(label, True, f"All 12 US/* timezones present ({len(data)} total)")
print_result(label, True, f"All {len(US_TIMEZONES)} US/* timezones present")
def test_timezone_europe_dedup(data):
"""
Regression: same root cause as US/* — group was 'Europe' for all Europe/* zones.
Spot-check that the priority ones appear individually after data fix.
"""
label = "time_zone: Europe/* spot-check (group=name data fix)"
if data is None:
print_result(label, False, "No data")
return
names = {item.get("name") for item in data}
missing = [tz for tz in EUROPE_TIMEZONES_SAMPLE if tz not in names]
if missing:
print_result(label, False, f"Missing (group data not yet fixed?): {missing}")
else:
print_result(label, True, f"Europe/* spot-check passed ({len(EUROPE_TIMEZONES_SAMPLE)} zones found)")
def test_country_us_dedup(data):
"""
Regression: PARTITION BY group must NOT produce duplicate alpha_2_code values.
Two records exist for alpha_2_code='US' (global default + account override) — only one
should survive. If PARTITION BY name were used, both would appear and Svelte would
throw each_key_duplicate on alpha_2_code='US'.
"""
label = "country: no duplicate alpha_2_code (PARTITION BY group dedup)"
if data is None:
print_result(label, False, "No data")
return
codes = [item.get("alpha_2_code") for item in data if item.get("alpha_2_code")]
duplicates = [c for c in set(codes) if codes.count(c) > 1]
if duplicates:
print_result(label, False, f"Duplicate alpha_2_codes: {duplicates}")
else:
print_result(label, True, f"No duplicates across {len(data)} countries")
def test_priority_only_count(data, expected=72):
"""priority=1 enabled timezones: should be exactly {expected} after data fix."""
label = f"time_zone priority-only count == {expected}"
if data is None:
print_result(label, False, "No data")
return
if len(data) == expected:
print_result(label, True, f"{len(data)} records")
else:
print_result(label, False, f"Got {len(data)}, expected {expected} (data fix pending?)")
if __name__ == "__main__":
print(f"🚀 Starting V3 Lookup E2E Suite ({BASE_URL})\n")
start_suite = time.time()
# 1. Basic Lists (Phase 1)
test_lookup_list("country")
# 1. Country — basic list + dedup regression
print("--- Country ---")
country_data = test_lookup_list("country")
test_country_us_dedup(country_data)
print("\n--- Regression: US/* timezone deduplication ---")
test_timezone_us_dedup()
# 2. Timezone — full list + group data fix regressions
print("\n--- Timezone (full list) ---")
tz_data = test_lookup_list("time_zone")
test_timezone_us_dedup(tz_data)
test_timezone_europe_dedup(tz_data)
print("\n--- Testing Priority Only ---")
test_lookup_list("time_zone", only_priority=True)
# 2. Whitelist Test (Phase 2)
# 3. Timezone — priority only
print("\n--- Timezone (priority only) ---")
tz_priority_data = test_lookup_list("time_zone", only_priority=True)
test_priority_only_count(tz_priority_data, expected=72)
# 4. Whitelist Test
if SITE_ID_RANDOM != "SET_ME_TO_SITE_ID":
print("\n--- Testing Site Whitelist Policy ---")
# Should return only whitelisted items
print("\n--- Site Whitelist Policy ---")
test_lookup_list("country", site_id=SITE_ID_RANDOM)
test_lookup_list("time_zone", site_id=SITE_ID_RANDOM)
else:
print("\n⚠️ Skipping Phase 2 test: SITE_ID_RANDOM not set.")
# 3. Resolve Test
print("\n--- Testing Resolve ---")
print("\n⚠️ Skipping whitelist test: SITE_ID_RANDOM not set.")
# 5. Resolve
print("\n--- Resolve ---")
test_lookup_resolve("country", "US")
print(f"\n⏱️ Suite completed in {time.time() - start_suite:.2f}s")