Skip to content

Commit b073748

Browse files
Improve test data generation for 4-year span with realistic policies
- Extend data range to 4 years (2022-2025) with 450 expenses - Add annual plan years starting Jan 1st each year: - 2022: HSA only - 2023: HSA+DCFSA - 2024-2025: FSA+DCFSA - 2026: FSA (future year) - HSA expenses always remain Open (never reimbursed) - Cap FSA/DCFSA reimbursements at election amounts - Add HSA balance history with proper timestamps showing $4k/year contributions - Reduce average expense to ~$89 for ~$10k/year total 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <[email protected]>
1 parent c501bb6 commit b073748

File tree

1 file changed

+136
-46
lines changed

1 file changed

+136
-46
lines changed

scripts/generate_test_data.py

Lines changed: 136 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -236,9 +236,9 @@ def create_placeholder_pdf_bytes(expense_data=None):
236236
PLACEHOLDER_IMAGE = None # Will be generated on first use
237237

238238
# Configuration
239-
NUM_EXPENSES = 350
239+
NUM_EXPENSES = 450 # Increased for 4 years of data
240240
START_DATE = datetime(2022, 1, 1)
241-
END_DATE = datetime(2024, 12, 31)
241+
END_DATE = datetime(2025, 12, 31) # 4 years of expenses
242242

243243
# Sample data
244244
PROVIDERS = [
@@ -382,40 +382,78 @@ def generate_test_data():
382382
'user_id': 1
383383
})
384384

385-
# Create plan years with a mix of plan types:
386-
# 2022: HSA only (no FSA/DCFSA)
387-
# 2023: HSA+DCFSA combined (HSA with dependent care)
388-
# 2024: FSA+DCFSA combined (no HSA, using FSA instead)
385+
# Create plan years - ONE policy type per year cycle, starting January 1st:
386+
# Policy types: HSA, HSA+DCFSA, FSA, or FSA+DCFSA
387+
# User saves $4,000/year to HSA when HSA-eligible (HSA or HSA+DCFSA policy)
388+
#
389+
# 2022: HSA (simple HSA only)
390+
# 2023: HSA+DCFSA (HSA with dependent care)
391+
# 2024: FSA+DCFSA (switched to FSA)
392+
# 2025: FSA+DCFSA (continuing FSA)
393+
# 2026: FSA (future year policy - no DCFSA)
389394
plan_years = []
390395
plan_year_id = 1
391396

392-
# 2022: No FSA/DCFSA plans - HSA only year
393-
# (HSA expenses don't need a plan_year_id)
397+
# 2022: HSA only
398+
plan_years.append({
399+
'id': plan_year_id,
400+
'user_id': 1,
401+
'plan_type': 'HSA',
402+
'plan_year_start': '2022-01-01',
403+
'plan_year_end': '2022-12-31',
404+
'election_amount': None, # HSA doesn't use election_amount
405+
'dcfsa_election_amount': None # No DCFSA this year
406+
})
407+
plan_year_id += 1
394408

395-
# 2023: HSA+DCFSA combined plan
409+
# 2023: HSA+DCFSA
396410
plan_years.append({
397411
'id': plan_year_id,
398412
'user_id': 1,
399413
'plan_type': 'HSA+DCFSA',
400414
'plan_year_start': '2023-01-01',
401415
'plan_year_end': '2023-12-31',
402-
'election_amount': 3850.00, # HSA family contribution limit 2023
416+
'election_amount': None, # HSA doesn't use election_amount
403417
'dcfsa_election_amount': 5000.00 # DCFSA election
404418
})
405419
plan_year_id += 1
406420

407-
# 2024: FSA+DCFSA combined plan (switched from HSA to FSA)
421+
# 2024: FSA+DCFSA (switched from HSA to FSA)
408422
plan_years.append({
409423
'id': plan_year_id,
410424
'user_id': 1,
411425
'plan_type': 'FSA+DCFSA',
412426
'plan_year_start': '2024-01-01',
413427
'plan_year_end': '2024-12-31',
414-
'election_amount': 3050.00, # FSA limit 2024
428+
'election_amount': 3200.00, # FSA election
415429
'dcfsa_election_amount': 5000.00 # DCFSA election
416430
})
417431
plan_year_id += 1
418432

433+
# 2025: FSA+DCFSA (current year)
434+
plan_years.append({
435+
'id': plan_year_id,
436+
'user_id': 1,
437+
'plan_type': 'FSA+DCFSA',
438+
'plan_year_start': '2025-01-01',
439+
'plan_year_end': '2025-12-31',
440+
'election_amount': 3300.00, # FSA election
441+
'dcfsa_election_amount': 5000.00 # DCFSA election
442+
})
443+
plan_year_id += 1
444+
445+
# 2026: FSA only (future year)
446+
plan_years.append({
447+
'id': plan_year_id,
448+
'user_id': 1,
449+
'plan_type': 'FSA',
450+
'plan_year_start': '2026-01-01',
451+
'plan_year_end': '2026-12-31',
452+
'election_amount': 3400.00, # FSA election
453+
'dcfsa_election_amount': None # No DCFSA this year
454+
})
455+
plan_year_id += 1
456+
419457
# Build plan year lookup
420458
# HSA expenses don't need plan_year_id (always NULL)
421459
# FSA expenses map to FSA or FSA+DCFSA plans
@@ -431,11 +469,31 @@ def generate_test_data():
431469
elif plan_type == 'HSA+DCFSA':
432470
# HSA expenses don't use plan_year_id, only DCFSA does
433471
plan_year_lookup[('DCFSA', year)] = py['id']
472+
elif plan_type == 'HSA':
473+
# HSA-only plan - HSA expenses don't use plan_year_id
474+
pass
434475
elif plan_type == 'FSA':
435476
plan_year_lookup[('FSA', year)] = py['id']
436477
elif plan_type == 'DCFSA':
437478
plan_year_lookup[('DCFSA', year)] = py['id']
438479

480+
# Build election amount lookup by (category, year)
481+
# FSA/DCFSA reimbursements cannot exceed election amounts
482+
election_lookup = {}
483+
for py in plan_years:
484+
year = int(py['plan_year_start'][:4])
485+
plan_type = py['plan_type']
486+
487+
if plan_type in ['FSA', 'FSA+DCFSA']:
488+
if py['election_amount']:
489+
election_lookup[('FSA', year)] = py['election_amount']
490+
if plan_type in ['DCFSA', 'FSA+DCFSA', 'HSA+DCFSA']:
491+
if py['dcfsa_election_amount']:
492+
election_lookup[('DCFSA', year)] = py['dcfsa_election_amount']
493+
494+
# Track cumulative reimbursements per (category, year)
495+
reimbursement_totals = {}
496+
439497
# Generate expenses
440498
expenses = []
441499
attachments = []
@@ -451,36 +509,64 @@ def generate_test_data():
451509
date_str = date_of_service.strftime('%Y-%m-%d')
452510
year = date_of_service.year
453511

454-
# Category depends on year and available plans:
455-
# 2022: HSA only (no FSA/DCFSA plans available)
456-
# 2023: HSA + DCFSA (HSA+DCFSA plan, no FSA)
457-
# 2024: FSA + DCFSA (FSA+DCFSA plan, no HSA)
512+
# Category depends on year's policy type:
513+
# 2022: HSA only
514+
# 2023: HSA+DCFSA (can use HSA or DCFSA)
515+
# 2024-2025: FSA+DCFSA (can use FSA or DCFSA)
458516
if year == 2022:
459-
# HSA only year
517+
# HSA only policy
460518
category = 'HSA'
461519
elif year == 2023:
462-
# HSA+DCFSA plan - can use HSA or DCFSA
520+
# HSA+DCFSA policy - can use HSA or DCFSA
463521
category = random.choices(['HSA', 'DCFSA'], weights=[0.7, 0.3])[0]
464-
else: # 2024
465-
# FSA+DCFSA plan - can use FSA or DCFSA
522+
else: # 2024, 2025
523+
# FSA+DCFSA policy - can use FSA or DCFSA
466524
category = random.choices(['FSA', 'DCFSA'], weights=[0.7, 0.3])[0]
467525

468-
# Random status with weights
469-
status = random.choices(STATUSES, weights=STATUS_WEIGHTS)[0]
470-
471-
# Random amount
472-
if random.random() < 0.3:
473-
amount = round(random.uniform(5, 50), 2)
474-
elif random.random() < 0.7:
475-
amount = round(random.uniform(50, 300), 2)
526+
# Status depends on category - HSA expenses are never reimbursed
527+
if category == 'HSA':
528+
status = 'Open'
529+
else:
530+
status = random.choices(STATUSES, weights=STATUS_WEIGHTS)[0]
531+
532+
# Random amount - targeting ~$10k/year average across all expenses
533+
# With ~112 expenses/year, avg expense should be ~$89
534+
rand_val = random.random()
535+
if rand_val < 0.50:
536+
amount = round(random.uniform(10, 50), 2) # 50%: small expenses
537+
elif rand_val < 0.85:
538+
amount = round(random.uniform(50, 150), 2) # 35%: medium expenses
476539
else:
477-
amount = round(random.uniform(300, 2000), 2)
540+
amount = round(random.uniform(150, 350), 2) # 15%: larger expenses
541+
542+
# Calculate reimbursement - HSA expenses are never reimbursed
543+
# FSA/DCFSA reimbursements cannot exceed election amounts
544+
if category == 'HSA':
545+
amount_inflow = 0
546+
elif category in ['FSA', 'DCFSA']:
547+
# Check election limit
548+
election_key = (category, year)
549+
election_limit = election_lookup.get(election_key, 0)
550+
current_total = reimbursement_totals.get(election_key, 0)
551+
remaining = election_limit - current_total
552+
553+
if remaining <= 0:
554+
# Election exhausted - no more reimbursements this year
555+
status = 'Open'
556+
amount_inflow = 0
557+
elif status in ['Reimbursed', 'Closed']:
558+
amount_inflow = min(amount, remaining)
559+
if amount_inflow < amount:
560+
status = 'Partially Reimbursed'
561+
elif status == 'Partially Reimbursed':
562+
desired = round(amount * random.uniform(0.3, 0.8), 2)
563+
amount_inflow = min(desired, remaining)
564+
else:
565+
amount_inflow = 0
478566

479-
# Calculate reimbursement
480-
if status in ['Reimbursed', 'Closed']:
481-
amount_inflow = amount
482-
elif status == 'Partially Reimbursed':
483-
amount_inflow = round(amount * random.uniform(0.3, 0.8), 2)
567+
# Track the reimbursement
568+
if amount_inflow > 0:
569+
reimbursement_totals[election_key] = current_total + amount_inflow
484570
else:
485571
amount_inflow = 0
486572

@@ -582,22 +668,26 @@ def generate_test_data():
582668
if expense_id % 50 == 0:
583669
print(f" Generated {expense_id}/{NUM_EXPENSES} expenses...")
584670

585-
# HSA balance history - realistic progression over years
671+
# HSA balance history - realistic progression over 4 years
672+
# User saves $4,000/year to HSA when HSA-eligible (2022 HSA, 2023 HSA+DCFSA)
673+
# 2024-2025 switched to FSA, so no new HSA contributions, but balance carries over
674+
# HSA expenses remain open (not reimbursed) - balance is available for future use
586675
hsa_balance_history = []
587676
history_id = 1
588677
running_balance = 0
678+
# (amount, change_type, notes, date)
679+
# Add entries each year so the chart shows the balance over time
589680
transactions = [
590-
(5000.00, 'initial_balance', 'Initial HSA rollover from previous employer'),
591-
(3850.00, 'contribution', '2022 annual HSA contribution'),
592-
(1500.00, 'contribution', '2022 employer contribution'),
593-
(3850.00, 'contribution', '2023 annual HSA contribution'),
594-
(1500.00, 'contribution', '2023 employer contribution'),
595-
(1000.00, 'contribution', '2023 catch-up contribution'),
596-
(-500.00, 'reimbursement_added', 'HSA reimbursement for dental work'),
597-
(1200.00, 'contribution', '2024 partial year contribution'),
681+
(2500.00, 'initial_balance', 'Initial HSA rollover from previous employer', '2022-01-01'),
682+
(4000.00, 'contribution', '2022 annual HSA contribution', '2022-01-15'),
683+
(4000.00, 'contribution', '2023 annual HSA contribution', '2023-01-15'),
684+
# No HSA contributions 2024-2025 (switched to FSA plan)
685+
# Add $0 entries to show balance carrying over unchanged
686+
(0.00, 'contribution', '2024 - No HSA contribution (FSA plan year)', '2024-01-15'),
687+
(0.00, 'contribution', '2025 - No HSA contribution (FSA plan year)', '2025-01-15'),
598688
]
599689

600-
for amount, change_type, notes in transactions:
690+
for amount, change_type, notes, date_str in transactions:
601691
previous = running_balance
602692
running_balance += amount
603693
hsa_balance_history.append({
@@ -609,11 +699,11 @@ def generate_test_data():
609699
'change_type': change_type,
610700
'related_expense_id': None,
611701
'notes': notes,
612-
'recorded_at': datetime.now().isoformat()
702+
'recorded_at': f'{date_str}T12:00:00'
613703
})
614704
history_id += 1
615705

616-
# Final balance should be ~$17,400
706+
# Final balance should be $10,500
617707
final_hsa_balance = running_balance
618708

619709
return {

0 commit comments

Comments
 (0)