Skip to content

Commit ce563ed

Browse files
committed
Return nan if baseline is None and fix warning message
1 parent 77df479 commit ce563ed

File tree

2 files changed

+9
-18
lines changed

2 files changed

+9
-18
lines changed

sdmetrics/single_table/privacy/disclosure_protection.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
CategoricalZeroCAP,
1515
)
1616

17-
MAX_NUM_ROWS = 50000
17+
MAX_NUM_ROWS = 10000
1818

1919
CAP_METHODS = {
2020
'CAP': CategoricalCAP,
@@ -212,7 +212,7 @@ def compute_breakdown(
212212
computation_method = computation_method.upper()
213213
if len(real_data) > MAX_NUM_ROWS or len(synthetic_data) > MAX_NUM_ROWS:
214214
warnings.warn(
215-
f'Data exceeds {MAX_NUM_ROWS} rows, perfomance may be slow.'
215+
f'Data exceeds {MAX_NUM_ROWS} rows, perfomance may be slow. '
216216
'Consider using the `DisclosureProtectionEstimate` for faster computation.'
217217
)
218218

@@ -238,7 +238,7 @@ def compute_breakdown(
238238
)
239239

240240
if baseline_protection == 0:
241-
score = 0 if cap_protection == 0 else 1
241+
score = np.nan
242242
else:
243243
score = min(cap_protection / baseline_protection, 1)
244244

@@ -363,7 +363,7 @@ def _compute_estimated_cap_metric(
363363
estimated_score_sum += estimated_cap_protection
364364
average_computed_score = estimated_score_sum / (i + 1.0)
365365
if baseline_protection == 0:
366-
average_score = 0 if average_computed_score == 0 else 1
366+
average_score = np.nan
367367
else:
368368
average_score = min(average_computed_score / baseline_protection, 1)
369369

tests/unit/single_table/privacy/test_disclosure_protection.py

+5-14
Original file line numberDiff line numberDiff line change
@@ -271,28 +271,19 @@ def test_compute_breakdown_zero_baseline(self, CAPMethodsMock):
271271
CAPMethodsMock.get.return_value = CAPMock
272272

273273
# Run
274-
score_breakdown_with_cap = DisclosureProtection.compute_breakdown(
275-
real_data=real_data,
276-
synthetic_data=synthetic_data,
277-
known_column_names=['col1'],
278-
sensitive_column_names=['col2'],
279-
)
280-
281-
CAPMock._compute.return_value = 0
282-
score_breakdown_no_cap = DisclosureProtection.compute_breakdown(
274+
score_breakdown = DisclosureProtection.compute_breakdown(
283275
real_data=real_data,
284276
synthetic_data=synthetic_data,
285277
known_column_names=['col1'],
286278
sensitive_column_names=['col2'],
287279
)
288280

289281
# Assert
290-
assert score_breakdown_with_cap == {
291-
'score': 1,
282+
assert score_breakdown == {
283+
'score': np.nan,
292284
'baseline_protection': 0,
293285
'cap_protection': 0.5,
294286
}
295-
assert score_breakdown_no_cap == {'score': 0, 'baseline_protection': 0, 'cap_protection': 0}
296287

297288
@patch('sdmetrics.single_table.privacy.disclosure_protection.CAP_METHODS')
298289
@patch(
@@ -323,7 +314,7 @@ def test_compute_breakdown_warns_too_large(
323314

324315
# Run
325316
expected_warning = re.escape(
326-
'Data exceeds 50000 rows, perfomance may be slow.'
317+
'Data exceeds 10000 rows, perfomance may be slow. '
327318
'Consider using the `DisclosureProtectionEstimate` for faster computation.'
328319
)
329320
with pytest.warns(UserWarning, match=expected_warning):
@@ -486,7 +477,7 @@ def test__compute_estimated_cap_metric_zero_baseline(self, CAPMethodsMock):
486477
)
487478

488479
# Assert
489-
assert avg_score == 1
480+
assert np.isnan(avg_score)
490481
assert avg_computed_score == 0.38
491482

492483
@patch(

0 commit comments

Comments
 (0)