Skip to content

Commit 1388a63

Browse files
authored
Merge pull request #118 from washingtonpost/release/2.1.2
Release/2.1.2 🎉
2 parents 610e5e7 + bb0d6c0 commit 1388a63

File tree

5 files changed

+23
-8
lines changed

5 files changed

+23
-8
lines changed

CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Changelog
22

3+
## 2.1.2 (10/24/2024)
4+
- feat: `agg_model_hard_threshold` now defaults to `True`
5+
- feat: using cross-validation to find the optimal OLS `lambda` for use in the `BootstrapElectionModel` is now optional due to the `lambda_` model parameter [#115](https://github.com/washingtonpost/elex-live-model/pull/115)
6+
37
## 2.1.1 (10/10/2024)
48
- fix: allow multiple `alpha` values passed in to `ModelClient.get_national_summary_votes_estimates()` and change that method to return a `pandas.DataFrame` [#111](https://github.com/washingtonpost/elex-live-model/pull/111)
59

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ Some model types have specific model parameters that can be included.
106106

107107
| Name | Type | Acceptable values | model |
108108
|-----------------------------------|---------|----------------------------------|-----------------|
109-
| lambda | numeric | regularization constant | all |
109+
| lambda_ | numeric | regularization constant | all |
110110
| turnout_factor_lower | numeric | drop units with < turnout factor | all |
111111
| turnout_factor_upper | numeric | drop units with < turnout factor | all |
112112
| robust | boolean | larger prediction intervals | `nonparametric` |

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
LONG_DESCRIPTION = f.read()
2121

2222
# The full version, including alpha/beta/rc tags
23-
RELEASE = "2.1.1"
23+
RELEASE = "2.1.2"
2424
# The short X.Y version
2525
VERSION = ".".join(RELEASE.split(".")[:2])
2626

src/elexmodel/models/BootstrapElectionModel.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,10 @@ def __init__(self, model_settings={}):
5555
self.strata = model_settings.get("strata", ["county_classification"]) # columns to stratify the data by
5656
self.T = model_settings.get("T", 5000) # temperature for aggregate model
5757
self.hard_threshold = model_settings.get(
58-
"agg_model_hard_threshold", False
58+
"agg_model_hard_threshold", True
5959
) # use sigmoid or hard thresold when calculating agg model
6060
self.district_election = model_settings.get("district_election", False)
61+
self.lambda_ = model_settings.get("lambda_", None) # regularization parameter for OLS
6162

6263
# upper and lower bounds for the quantile regression which define the strata distributions
6364
# these make sure that we can control the worst cases for the distributions in case we
@@ -807,8 +808,14 @@ def compute_bootstrap_errors(
807808
)
808809

809810
# we use k-fold cross validation to find the optimal lambda for our OLS regression
810-
optimal_lambda_y = self.cv_lambda(x_train, y_train, np.logspace(-3, 2, 20), weights=weights_train)
811-
optimal_lambda_z = self.cv_lambda(x_train, z_train, np.logspace(-3, 2, 20), weights=weights_train)
811+
if self.lambda_ is None:
812+
optimal_lambda_y = self.cv_lambda(x_train, y_train, np.logspace(-3, 2, 20), weights=weights_train)
813+
optimal_lambda_z = self.cv_lambda(x_train, z_train, np.logspace(-3, 2, 20), weights=weights_train)
814+
LOG.info(f"Optimal lambda for y: {optimal_lambda_y}, Optimal lambda for z: {optimal_lambda_z}")
815+
else:
816+
optimal_lambda_y = self.lambda_
817+
optimal_lambda_z = self.lambda_
818+
LOG.info(f"Using user provided lambda: {self.lambda_}")
812819

813820
# step 1) fit the initial model
814821
# we don't want to regularize the intercept or the coefficient for baseline_normalized_margin
@@ -1490,7 +1497,7 @@ def get_national_summary_estimates(self, nat_sum_data_dict: dict, base_to_add: i
14901497

14911498
# we also need a national aggregate point prediction
14921499
if self.hard_threshold:
1493-
aggregate_dem_probs_total = self.aggregate_pred_margin > 0.5
1500+
aggregate_dem_probs_total = self.aggregate_pred_margin > 0
14941501
else:
14951502
aggregate_dem_probs_total = expit(self.T * self.aggregate_pred_margin)
14961503

tests/test_client.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -876,5 +876,9 @@ def test_get_national_summary_votes_estimates(model_client, va_governor_county_d
876876

877877
current = model_client.get_national_summary_votes_estimates(None, 0, [0.99])
878878

879-
pd.testing.assert_frame_equal(current, model_client.results_handler.final_results["nat_sum_data"])
880-
pd.testing.assert_frame_equal(expected_df, model_client.results_handler.final_results["nat_sum_data"])
879+
pd.testing.assert_frame_equal(
880+
current, model_client.results_handler.final_results["nat_sum_data"], check_dtype=False
881+
)
882+
pd.testing.assert_frame_equal(
883+
expected_df, model_client.results_handler.final_results["nat_sum_data"], check_dtype=False
884+
)

0 commit comments

Comments
 (0)