Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add a link to the time series masking blog post from docstrings #744

Merged
merged 2 commits into from
Apr 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions dianna/methods/lime_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from lime import explanation
from lime import lime_base
from dianna import utils
from dianna.utils.maskers import generate_masks
from dianna.utils.maskers import generate_time_series_masks
from dianna.utils.maskers import mask_data
from dianna.utils.predict import make_predictions

Expand Down Expand Up @@ -81,7 +81,9 @@ def explain(
# wrap up the input model or function using the runner
runner = utils.get_function(
model_or_function, preprocess_function=self.preprocess_function)
masks = generate_masks(input_timeseries, num_samples, p_keep=0.1)
masks = generate_time_series_masks(input_timeseries,
num_samples,
p_keep=0.1)
# NOTE: Required by `lime_base` explainer since the first instance must be the original data
# For more details, check this link
# https://github.com/marcotcr/lime/blob/fd7eb2e6f760619c29fca0187c07b82157601b32/lime/lime_base.py#L148
Expand Down
10 changes: 5 additions & 5 deletions dianna/methods/rise_timeseries.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Optional
import numpy as np
from dianna import utils
from dianna.utils.maskers import generate_masks
from dianna.utils.maskers import generate_time_series_masks
from dianna.utils.maskers import mask_data
from dianna.utils.predict import make_predictions
from dianna.utils.rise_utils import normalize
Expand Down Expand Up @@ -68,10 +68,10 @@ def explain(self,
runner = utils.get_function(
model_or_function, preprocess_function=self.preprocess_function)

masks = generate_masks(input_timeseries,
number_of_masks=self.n_masks,
feature_res=self.feature_res,
p_keep=self.p_keep)
masks = generate_time_series_masks(input_timeseries,
number_of_masks=self.n_masks,
feature_res=self.feature_res,
p_keep=self.p_keep)
self.masks = masks if self.keep_masks else None
masked = mask_data(input_timeseries, masks, mask_type=mask_type)
self.masked = masked if self.keep_masked_data else None
Expand Down
18 changes: 16 additions & 2 deletions dianna/utils/maskers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,24 @@
from skimage.transform import resize


def generate_masks(
def generate_time_series_masks(
input_data: np.ndarray,
number_of_masks: int,
feature_res: int = 8,
p_keep: float = 0.5,
):
"""Generate masks for time series data given a probability of keeping any time step or channel unmasked.

Note that, for multivariate data, the resulting masks will be an evenly distributed sample of the following 3 kinds
of masks:
- Channel masks. These are masks that mask a whole channel at the time, masking all its time steps simultaneously.
- Time step masks. These mask all channels simultaneously for selected time steps. Masked time steps are selected
randomly, while grouping adjacent time steps. See for a complete description of how we generate time step masks
in our blog post: https://medium.com/escience-center/masking-time-series-for-explainable-ai-90247ac252b4
- Combination masks: These masks are a combination of the above 2 types.

For univariate data, only time step masks are returned.

Args:
input_data: Timeseries data to be masked.
number_of_masks: Number of masks to generate.
Expand Down Expand Up @@ -124,7 +134,11 @@ def _determine_number_masked(p_keep: float, series_length: int) -> int:

def generate_time_step_masks(input_data: np.ndarray, number_of_masks: int,
p_keep: float, number_of_features: int):
"""Generate masks that masks complete time steps at a time while masking time steps in a segmented fashion."""
"""Generate masks that masks complete time steps at a time while masking time steps in a segmented fashion.

For a conceptual description see:
https://medium.com/escience-center/masking-time-series-for-explainable-ai-90247ac252b4.
"""
time_series_length = input_data.shape[0]
number_of_channels = input_data.shape[1]

Expand Down
15 changes: 8 additions & 7 deletions tests/methods/test_lime_timeseries.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from unittest import TestCase
import numpy as np
from dianna.methods.lime_timeseries import LIMETimeseries
from dianna.utils.maskers import generate_masks
from dianna.utils.maskers import generate_time_series_masks
from dianna.utils.maskers import mask_data
from tests.utils import run_model

Expand All @@ -17,8 +17,8 @@ def test_lime_timeseries_correct_output_shape(self):
exp = explainer.explain(
run_model,
input_data,
labels=(0,),
class_names=("test",),
labels=(0, ),
class_names=("test", ),
num_features=num_features,
num_samples=10,
num_slices=10,
Expand All @@ -30,12 +30,13 @@ def test_distance_shape(self):
"""Test the shape of returned distance array."""
dummy_timeseries = np.random.random((50, 1))
number_of_masks = 50
masks = generate_masks(dummy_timeseries, number_of_masks, p_keep=0.9)
masks = generate_time_series_masks(dummy_timeseries,
number_of_masks,
p_keep=0.9)
masked = mask_data(dummy_timeseries, masks, mask_type="mean")
explainer = LIMETimeseries()
distance = explainer._calculate_distance(
masked.reshape((-1, 50)), distance_method="cosine"
)
distance = explainer._calculate_distance(masked.reshape((-1, 50)),
distance_method="cosine")
assert len(distance) == number_of_masks

def test_euclidean_distance(self):
Expand Down
37 changes: 19 additions & 18 deletions tests/methods/test_maskers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from dianna.utils.maskers import generate_channel_masks
from dianna.utils.maskers import generate_interpolated_float_masks_for_image
from dianna.utils.maskers import generate_interpolated_float_masks_for_timeseries
from dianna.utils.maskers import generate_masks
from dianna.utils.maskers import generate_time_series_masks
from dianna.utils.maskers import generate_time_step_masks
from dianna.utils.maskers import mask_data

Expand All @@ -14,7 +14,7 @@ def test_mask_has_correct_shape_univariate():
input_data = _get_univariate_time_series()
number_of_masks = 5

result = generate_masks(input_data, number_of_masks)
result = generate_time_series_masks(input_data, number_of_masks)

assert result.shape == tuple([number_of_masks] + list(input_data.shape))

Expand All @@ -24,7 +24,8 @@ def test_mask_has_correct_type_univariate():
input_data = _get_univariate_time_series()
number_of_masks = 5

result = generate_masks(input_data, number_of_masks=number_of_masks)
result = generate_time_series_masks(input_data,
number_of_masks=number_of_masks)

assert result.dtype == bool

Expand Down Expand Up @@ -122,10 +123,10 @@ def _call_masking_function(
feature_res=5,
):
"""Helper function with some defaults to call the code under test."""
masks = generate_masks(input_data,
number_of_masks,
feature_res,
p_keep=p_keep)
masks = generate_time_series_masks(input_data,
number_of_masks,
feature_res,
p_keep=p_keep)
return mask_data(input_data, masks, mask_type=mask_type)


Expand Down Expand Up @@ -174,7 +175,7 @@ def test_masking_has_correct_shape_multivariate():
number_of_masks = 15
input_data = _get_multivariate_time_series()

result = generate_masks(input_data, number_of_masks)
result = generate_time_series_masks(input_data, number_of_masks)

assert result.shape == tuple([number_of_masks] + list(input_data.shape))

Expand All @@ -184,7 +185,7 @@ def test_masking_univariate_leaves_anything_unmasked():
number_of_masks = 1
input_data = _get_univariate_time_series()

result = generate_masks(input_data, number_of_masks)
result = generate_time_series_masks(input_data, number_of_masks)

assert np.any(result)
assert np.any(~result)
Expand All @@ -201,7 +202,7 @@ def test_masking_keep_first_instance():
"""
input_data = _get_multivariate_time_series()
number_of_masks = 5
masks = generate_masks(input_data, number_of_masks, p_keep=0.9)
masks = generate_time_series_masks(input_data, number_of_masks, p_keep=0.9)
masks[0, :, :] = 1.0
masked = mask_data(input_data, masks, mask_type="mean")
assert np.array_equal(masked[0, :, :], input_data)
Expand All @@ -215,10 +216,10 @@ def test_masks_approximately_correct_number_of_masked_parts_per_time_step(
number_of_masks = 500
input_data = _get_univariate_time_series(num_steps=num_steps)

masks = generate_masks(input_data,
number_of_masks=number_of_masks,
feature_res=num_steps,
p_keep=p_keep)[:, :, 0]
masks = generate_time_series_masks(input_data,
number_of_masks=number_of_masks,
feature_res=num_steps,
p_keep=p_keep)[:, :, 0]

masks_mean = DataFrame(masks).mean()
print('\n')
Expand All @@ -234,10 +235,10 @@ def test_masks_approximately_correct_number_of_masked_parts_per_time_step_projec
number_of_masks = 500
input_data = _get_univariate_time_series(num_steps=num_steps)

masks = generate_masks(input_data,
number_of_masks=number_of_masks,
feature_res=6,
p_keep=p_keep)[:, :, 0]
masks = generate_time_series_masks(input_data,
number_of_masks=number_of_masks,
feature_res=6,
p_keep=p_keep)[:, :, 0]
print_univariate_masks(masks[:5])

masks_mean = DataFrame(masks).mean()
Expand Down
Loading