Skip to content

Commit 3841444

Browse files
committed
Add missing comment section
1 parent cafc4eb commit 3841444

File tree

1 file changed

+14
-3
lines changed

1 file changed

+14
-3
lines changed

tests/integration/single_table/privacy/test_dcr_overfitting_protection.py

+14-3
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ def test_end_to_end_with_demo(self):
6161

6262
def test_compute_breakdown_drop_all_columns(self):
6363
"""Testing invalid sdtypes and ensure only appropriate columns are measured."""
64+
# Setup
6465
train_data = pd.DataFrame({'bad_col': [10.0, 15.0], 'num_col': [1.0, 2.0]})
6566
synth_data = pd.DataFrame({'bad_col': [2.0, 1.0], 'num_col': [1.0, 2.0]})
6667
holdout_data = pd.DataFrame({'bad_col': [2.0, 1.0], 'num_col': [3.0, 4.0]})
@@ -71,9 +72,12 @@ def test_compute_breakdown_drop_all_columns(self):
7172
}
7273
}
7374

75+
# Run
7476
result = DCROverfittingProtection.compute_breakdown(
7577
train_data, synth_data, holdout_data, metadata
7678
)
79+
80+
# Assert
7781
assert result['score'] == 0.0
7882
assert result['synthetic_data_percentages']['closer_to_training'] == 1.0
7983
assert result['synthetic_data_percentages']['closer_to_holdout'] == 0.0
@@ -104,24 +108,31 @@ def test_compute_breakdown_subsampling(self):
104108
assert compute_full_1 == compute_full_2
105109

106110
def test_compute_breakdown_iterations(self):
107-
"""Test that number iterations for subsampling affect results."""
111+
"""Test that number iterations for subsampling works as expected."""
108112
# Setup
109113
train_data = pd.DataFrame({'num_col': [random.randint(1, 1000) for _ in range(10)]})
110114
holdout_data = pd.DataFrame({'num_col': [random.randint(1, 1000) for _ in range(10)]})
111115
synthetic_data = pd.DataFrame({'num_col': [random.randint(1, 1000) for _ in range(10)]})
112116
metadata = {'columns': {'num_col': {'sdtype': 'numerical'}}}
113117
num_rows_subsample = 3
118+
num_iterations = 1000
114119

115120
# Run
116121
compute_num_iteration_1 = DCROverfittingProtection.compute_breakdown(
117122
train_data, synthetic_data, holdout_data, metadata, num_rows_subsample, 1
118123
)
119124
compute_num_iteration_1000 = DCROverfittingProtection.compute_breakdown(
120-
train_data, synthetic_data, holdout_data, metadata, num_rows_subsample, 1000
125+
train_data, synthetic_data, holdout_data, metadata, num_rows_subsample, num_iterations
121126
)
122127
compute_train_same = DCROverfittingProtection.compute_breakdown(
123-
synthetic_data, synthetic_data, holdout_data, metadata, num_rows_subsample, 1000
128+
synthetic_data,
129+
synthetic_data,
130+
holdout_data,
131+
metadata,
132+
num_rows_subsample,
133+
num_iterations,
124134
)
125135

136+
# Assert
126137
assert compute_num_iteration_1 != compute_num_iteration_1000
127138
assert compute_train_same['score'] == 0.0

0 commit comments

Comments
 (0)