@@ -52,7 +52,7 @@ class BootstrapElectionModel(BaseElectionModel):
52
52
and the epsilons are contest (state/district) level random effects.
53
53
"""
54
54
55
- def __init__ (self , model_settings = {}, versioned_data_handler = None ):
55
+ def __init__ (self , model_settings = {}, versioned_data_handler = None , pres_predictions = None ):
56
56
super ().__init__ (model_settings )
57
57
self .B = model_settings .get ("B" , 500 ) # number of bootstrap samples
58
58
self .strata = model_settings .get ("strata" , ["county_classification" ]) # columns to stratify the data by
@@ -61,6 +61,7 @@ def __init__(self, model_settings={}, versioned_data_handler=None):
61
61
"agg_model_hard_threshold" , True
62
62
) # use sigmoid or hard thresold when calculating agg model
63
63
self .district_election = model_settings .get ("district_election" , False )
64
+
64
65
self .lambda_ = model_settings .get ("lambda_" , None ) # regularization parameter for OLS
65
66
66
67
# save versioned data for later use
@@ -70,6 +71,10 @@ def __init__(self, model_settings={}, versioned_data_handler=None):
70
71
self .extrapolate_std_method = model_settings .get ("extrapolate_std_method" , "std" )
71
72
self .max_dist_to_observed = model_settings .get ("max_dist_to_observed" , 5 )
72
73
74
+ # save presidenial predictions for later use
75
+ self .pres_predictions = pres_predictions
76
+ self .correct_from_presidential = model_settings .get ("correct_from_presidential" , False )
77
+
73
78
# upper and lower bounds for the quantile regression which define the strata distributions
74
79
# these make sure that we can control the worst cases for the distributions in case we
75
80
# haven't seen enough data ayet
@@ -1283,6 +1288,49 @@ def compute_bootstrap_errors(
1283
1288
extrap_filter
1284
1289
]
1285
1290
1291
+ if self .correct_from_presidential :
1292
+ nonreporting_units ["geographic_unit_fips_p" ] = nonreporting_units .geographic_unit_fips .apply (
1293
+ lambda x : x .split ("_" )[1 ]
1294
+ )
1295
+ nonreporting_units = nonreporting_units .merge (
1296
+ self .pres_predictions ,
1297
+ left_on = "geographic_unit_fips_p" ,
1298
+ right_on = "geographic_unit_fips" ,
1299
+ how = "left" ,
1300
+ suffixes = ("" , "_pres" ),
1301
+ )
1302
+
1303
+ # adjust results_normalized_margin_pres to account for split counties
1304
+
1305
+ nonreporting_units ["margin_adj" ] = (
1306
+ nonreporting_units .baseline_normalized_margin - nonreporting_units .baseline_normalized_margin_pres
1307
+ )
1308
+
1309
+ nonreporting_units ["results_normalized_margin_pres" ] = (
1310
+ nonreporting_units .results_margin_pres / nonreporting_units .results_weights_pres
1311
+ + nonreporting_units .margin_adj
1312
+ )
1313
+ nonreporting_units ["pred_normalized_margin_pres" ] = (
1314
+ nonreporting_units .pred_margin / nonreporting_units .pred_turnout + nonreporting_units .margin_adj
1315
+ )
1316
+
1317
+ nonreporting_units ["pred_normalized_margin" ] = np .mean (
1318
+ y_test_pred_B .clip (min = y_partial_reporting_lower , max = y_partial_reporting_upper ), axis = 1
1319
+ )
1320
+
1321
+ nonreporting_units ["margin_gap" ] = (
1322
+ nonreporting_units .results_normalized_margin - nonreporting_units .results_normalized_margin_pres
1323
+ )
1324
+
1325
+ nonreporting_units ["pred_normalized_margin_new" ] = (
1326
+ nonreporting_units .pred_normalized_margin_pres + nonreporting_units .margin_gap
1327
+ )
1328
+ adjustment = (
1329
+ nonreporting_units ["pred_normalized_margin_new" ].values
1330
+ - nonreporting_units ["pred_normalized_margin" ].values
1331
+ )
1332
+ y_test_pred_B [~ np .isnan (adjustment )] += adjustment [~ np .isnan (adjustment )].reshape (- 1 , 1 )
1333
+
1286
1334
y_test_pred_B = y_test_pred_B .clip (min = y_partial_reporting_lower , max = y_partial_reporting_upper )
1287
1335
1288
1336
# \tilde{y_i}^{b} * \tilde{z_i}^{b}
0 commit comments