Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .claude/worktrees/agent-a7222b9b
Submodule agent-a7222b9b added at b783d9
1 change: 1 addition & 0 deletions changelog.d/changed/improve-ss-subcomponent-shares.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Replace equal-share (1/4 each) fallback in SS sub-component reconciliation with SSA Fact Sheet proportions (~73% retirement, ~10% disability, ~11% survivors, ~6% dependents) for records where QRF predicts all zeros.
15 changes: 13 additions & 2 deletions policyengine_us_data/datasets/cps/extended_cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,9 +279,10 @@ def reconcile_ss_subcomponents(predictions, total_ss):
nonzero_rows = row_sums > 0
both = positive_mask & nonzero_rows
shares[both] = values[both] / row_sums[both, np.newaxis]
# If row_sum == 0 but total_ss > 0, distribute equally.
# If row_sum == 0 but total_ss > 0, use SSA aggregate shares.
equal_rows = positive_mask & ~nonzero_rows
shares[equal_rows] = 1.0 / values.shape[1]
ssa_totals = np.array([_SSA_DEFAULT_SHARES[c] for c in predictions.columns])
shares[equal_rows] = ssa_totals / ssa_totals.sum()

out = np.where(
positive_mask[:, np.newaxis],
Expand All @@ -306,6 +307,16 @@ def reconcile_ss_subcomponents(predictions, total_ss):
"social_security_survivors",
}

# SSA Fact Sheet aggregate totals (billions $). Used as fallback
# shares when QRF predicts all zeros for a record that has positive
# total social_security. Source: HARD_CODED_TOTALS in utils/loss.py.
_SSA_DEFAULT_SHARES = {
"social_security_retirement": 1_060e9,
"social_security_disability": 148e9,
"social_security_survivors": 160e9,
"social_security_dependents": 84e9,
}


def _apply_post_processing(predictions, X_test, time_period, data):
"""Apply retirement constraints and SS reconciliation."""
Expand Down
27 changes: 27 additions & 0 deletions policyengine_us_data/tests/test_extended_cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from policyengine_us_data.datasets.cps.extended_cps import (
CPS_ONLY_IMPUTED_VARIABLES,
CPS_STAGE2_INCOME_PREDICTORS,
_SSA_DEFAULT_SHARES,
apply_retirement_constraints,
reconcile_ss_subcomponents,
)
Expand Down Expand Up @@ -250,6 +251,32 @@ def test_single_component_gets_full_total(self):
25000, abs=0.01
)

def test_zero_predictions_use_ssa_shares(self):
"""When QRF predicts all zeros, fallback should use SSA
aggregate proportions (not equal 1/4 shares)."""
cols = [
"social_security_retirement",
"social_security_disability",
"social_security_dependents",
"social_security_survivors",
]
predictions = pd.DataFrame({c: [0.0] for c in cols})
total_ss = np.array([10000.0])
result = reconcile_ss_subcomponents(predictions, total_ss)

# Compute expected shares from the SSA totals dict.
ssa_totals = np.array([_SSA_DEFAULT_SHARES[c] for c in cols])
expected_shares = ssa_totals / ssa_totals.sum()

for i, col in enumerate(cols):
assert result[col].values[0] == pytest.approx(
10000.0 * expected_shares[i], rel=1e-6
), f"{col} share mismatch"

# Retirement should dominate (~73%), not be 25%.
ret_share = result["social_security_retirement"].values[0] / 10000.0
assert ret_share > 0.70, f"Retirement share {ret_share:.3f} should be > 0.70"


class TestSequentialQRF:
"""Verify that sequential QRF produces correlated outputs."""
Expand Down
Loading