Added cutout support for all video data

f4str · f4str · commit 1db4e0a0799a · 2022-10-27T17:56:02.000-07:00
Signed-off-by: Farhan Ahmed &lt;Farhan.Ahmed@ibm.com&gt;
diff --git a/art/defences/preprocessor/cutout/cutout.py b/art/defences/preprocessor/cutout/cutout.py
@@ -78,24 +78,35 @@ def __call__(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> Tuple[np.nd
         """
         Apply Cutout data augmentation to sample `x`.
 
-        :param x: Sample to compress with shape of `NCHW` or `NHWC`. The `x` values are expected to be in
-                  the data range [0, 1] or [0, 255].
+        :param x: Sample to cut out with shape of `NCHW`, `NHWC`, `NCFHW` or `NFHWC`.
+                  `x` values are expected to be in the data range [0, 1] or [0, 255].
         :param y: Labels of the sample `x`. This function does not affect them in any way.
         :return: Data augmented sample.
         """
         x_ndim = len(x.shape)
 
+        # NCHW/NCFHW/NFHWC --> NHWC
         if x_ndim == 4:
             if self.channels_first:
-                # NCHW
-                n, _, height, width = x.shape
+                # NCHW --> NHWC
+                x_nhwc = np.transpose(x, (0, 2, 3, 1))
             else:
-                # NHWC
-                n, height, width, _ = x.shape
+                x_nhwc = x
+        elif x_ndim == 5:
+            if self.channels_first:
+                # NCFHW --> NFHWC --> NHWC
+                nb_clips, channels, clip_size, height, width = x.shape
+                x_nfhwc = np.transpose(x, (0, 2, 3, 4, 1))
+                x_nhwc = np.reshape(x_nfhwc, (nb_clips * clip_size, height, width, channels))
+            else:
+                # NFHWC --> NHWC
+                nb_clips, clip_size, height, width, channels = x.shape
+                x_nhwc = np.reshape(x, (nb_clips * clip_size, height, width, channels))
         else:
-            raise ValueError("Unrecognized input dimension. Cutout can only be applied to image data.")
+            raise ValueError("Unrecognized input dimension. Cutout can only be applied to image and video data.")
 
-        masks = np.ones_like(x)
+        n, height, width, _ = x_nhwc.shape
+        masks = np.ones_like(x_nhwc)
 
         # generate a random bounding box per image
         for idx in trange(n, desc="Cutout", disable=not self.verbose):
@@ -108,12 +119,25 @@ def __call__(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> Tuple[np.nd
             bbx2 = np.clip(center_x + self.length // 2, 0, width)
 
             # zero out the bounding box
+            masks[idx, bbx1:bbx2, bby1:bby2, :] = 0
+
+        x_nhwc = x_nhwc * masks
+
+        # NCHW/NCFHW/NFHWC <-- NHWC
+        if x_ndim == 4:
             if self.channels_first:
-                masks[idx, :, bbx1:bbx2, bby1:bby2] = 0
+                # NHWC <-- NCHW
+                x_aug = np.transpose(x_nhwc, (0, 3, 1, 2))
             else:
-                masks[idx, bbx1:bbx2, bby1:bby2, :] = 0
-
-        x_aug = x * masks
+                x_aug = x_nhwc
+        elif x_ndim == 5:  # lgtm [py/redundant-comparison]
+            if self.channels_first:
+                # NCFHW <-- NFHWC <-- NHWC
+                x_nfhwc = np.reshape(x_nhwc, (nb_clips, clip_size, height, width, channels))
+                x_aug = np.transpose(x_nfhwc, (0, 4, 1, 2, 3))
+            else:
+                # NFHWC <-- NHWC
+                x_aug = np.reshape(x_nhwc, (nb_clips, clip_size, height, width, channels))
 
         return x_aug, y
 
diff --git a/art/defences/preprocessor/cutout/cutout_pytorch.py b/art/defences/preprocessor/cutout/cutout_pytorch.py
@@ -72,6 +72,9 @@ def __init__(
         :param device_type: Type of device on which the classifier is run, either `gpu` or `cpu`.
         :param verbose: Show progress bars.
         """
+        import torch  # lgtm [py/repeated-import]
+        from torch.autograd import Function
+
         super().__init__(
             device_type=device_type,
             is_fitted=True,
@@ -83,50 +86,89 @@ def __init__(
         self.verbose = verbose
         self._check_params()
 
+        class RandomCutout(Function):  # pylint: disable=W0223
+            """
+            Function running Preprocessor.
+            """
+
+            @staticmethod
+            def forward(ctx, input):  # pylint: disable=W0622,W0221
+                ctx.save_for_backward(input)
+                n, _, height, width = input.shape
+                masks = torch.ones_like(input)
+
+                # generate a random bounding box per image
+                for idx in trange(n, desc="Cutout", disable=not self.verbose):
+                    # uniform sampling
+                    center_x = torch.randint(0, height, (1,))
+                    center_y = torch.randint(0, width, (1,))
+                    bby1 = torch.clamp(center_y - self.length // 2, 0, height)
+                    bbx1 = torch.clamp(center_x - self.length // 2, 0, width)
+                    bby2 = torch.clamp(center_y + self.length // 2, 0, height)
+                    bbx2 = torch.clamp(center_x + self.length // 2, 0, width)
+
+                    # zero out the bounding box
+                    masks[idx, :, bbx1:bbx2, bby1:bby2] = 0  # type: ignore
+
+                return input * masks
+
+            @staticmethod
+            def backward(ctx, grad_output):  # pylint: disable=W0221
+                return grad_output
+
+        self._random_cutout = RandomCutout
+
     def forward(
         self, x: "torch.Tensor", y: Optional["torch.Tensor"] = None
     ) -> Tuple["torch.Tensor", Optional["torch.Tensor"]]:
         """
         Apply Cutout data augmentation to sample `x`.
 
-        :param x: Sample to compress with shape of `NCHW` or `NHWC`. The `x` values are expected to be in
-                  the data range [0, 1] or [0, 255].
+        :param x: Sample to cut out with shape of `NCHW`, `NHWC`, `NCFHW` or `NFHWC`.
+                  `x` values are expected to be in the data range [0, 1] or [0, 255].
         :param y: Labels of the sample `x`. This function does not affect them in any way.
         :return: Data augmented sample.
         """
-        import torch  # lgtm [py/repeated-import]
-
         x_ndim = len(x.shape)
 
+        # NHWC/NCFHW/NFHWC --> NCHW.
         if x_ndim == 4:
             if self.channels_first:
-                # NCHW
-                n, _, height, width = x.shape
+                x_nchw = x
             else:
-                # NHWC
-                n, height, width, _ = x.shape
+                # NHWC --> NCHW
+                x_nchw = x.permute(0, 3, 1, 2)
+        elif x_ndim == 5:
+            if self.channels_first:
+                # NCFHW --> NFCHW --> NCHW
+                nb_clips, channels, clip_size, height, width = x.shape
+                x_nchw = x.permute(0, 2, 1, 3, 4).reshape(nb_clips * clip_size, channels, height, width)
+            else:
+                # NFHWC --> NHWC --> NCHW
+                nb_clips, clip_size, height, width, channels = x.shape
+                x_nchw = x.reshape(nb_clips * clip_size, height, width, channels).permute(0, 3, 1, 2)
         else:
-            raise ValueError("Unrecognized input dimension. Cutout can only be applied to image data.")
-
-        masks = torch.ones_like(x)
+            raise ValueError("Unrecognized input dimension. Cutout can only be applied to image and video data.")
 
-        # generate a random bounding box per image
-        for idx in trange(n, desc="Cutout", disable=not self.verbose):
-            # uniform sampling
-            center_x = torch.randint(0, height, (1,))
-            center_y = torch.randint(0, width, (1,))
-            bby1 = torch.clamp(center_y - self.length // 2, 0, height)
-            bbx1 = torch.clamp(center_x - self.length // 2, 0, width)
-            bby2 = torch.clamp(center_y + self.length // 2, 0, height)
-            bbx2 = torch.clamp(center_x + self.length // 2, 0, width)
+        # apply random cutout
+        x_nchw = self._random_cutout.apply(x_nchw)
 
-            # zero out the bounding box
+        # NHWC/NCFHW/NFHWC <-- NCHW.
+        if x_ndim == 4:
             if self.channels_first:
-                masks[idx, :, bbx1:bbx2, bby1:bby2] = 0  # type: ignore
+                x_aug = x_nchw
             else:
-                masks[idx, bbx1:bbx2, bby1:bby2, :] = 0  # type: ignore
-
-        x_aug = x * masks
+                # NHWC <-- NCHW
+                x_aug = x_nchw.permute(0, 2, 3, 1)
+        elif x_ndim == 5:  # lgtm [py/redundant-comparison]
+            if self.channels_first:
+                # NCFHW <-- NFCHW <-- NCHW
+                x_nfchw = x_nchw.reshape(nb_clips, clip_size, channels, height, width)
+                x_aug = x_nfchw.permute(0, 2, 1, 3, 4)
+            else:
+                # NFHWC <-- NHWC <-- NCHW
+                x_nhwc = x_nchw.permute(0, 2, 3, 1)
+                x_aug = x_nhwc.reshape(nb_clips, clip_size, height, width, channels)
 
         return x_aug, y
 
diff --git a/art/defences/preprocessor/cutout/cutout_tensorflow.py b/art/defences/preprocessor/cutout/cutout_tensorflow.py
@@ -29,8 +29,6 @@
 import logging
 from typing import Optional, Tuple, TYPE_CHECKING
 
-from tqdm.auto import trange
-
 from art.defences.preprocessor.preprocessor import PreprocessorTensorFlowV2
 
 if TYPE_CHECKING:
@@ -80,45 +78,57 @@ def forward(self, x: "tf.Tensor", y: Optional["tf.Tensor"] = None) -> Tuple["tf.
         """
         Apply Cutout data augmentation to sample `x`.
 
-        :param x: Sample to compress with shape of `NCHW` or `NHWC`. The `x` values are expected to be in
-                  the data range [0, 1] or [0, 255].
+        :param x: Sample to cut out with shape of `NCHW`, `NHWC`, `NCFHW` or `NFHWC`.
+                  `x` values are expected to be in the data range [0, 1] or [0, 255].
         :param y: Labels of the sample `x`. This function does not affect them in any way.
         :return: Data augmented sample.
         """
         import tensorflow as tf  # lgtm [py/repeated-import]
+        import tensorflow_addons as tfa
 
         x_ndim = len(x.shape)
 
+        # NCHW/NCFHW/NFHWC --> NHWC
         if x_ndim == 4:
             if self.channels_first:
-                # NCHW
-                n, _, height, width = x.shape
+                # NCHW --> NHWC
+                x_nhwc = tf.transpose(x, (0, 2, 3, 1))
+            else:
+                x_nhwc = x
+        elif x_ndim == 5:
+            if self.channels_first:
+                # NCFHW --> NFHWC --> NHWC
+                nb_clips, channels, clip_size, height, width = x.shape
+                x_nfhwc = tf.transpose(x, (0, 2, 3, 4, 1))
+                x_nhwc = tf.reshape(x_nfhwc, (nb_clips * clip_size, height, width, channels))
             else:
-                # NHWC
-                n, height, width, _ = x.shape
+                # NFHWC --> NHWC
+                nb_clips, clip_size, height, width, channels = x.shape
+                x_nhwc = tf.reshape(x, (nb_clips * clip_size, height, width, channels))
         else:
-            raise ValueError("Unrecognized input dimension. Cutout can only be applied to image data.")
+            raise ValueError("Unrecognized input dimension. Cutout can only be applied to image and video data.")
 
-        masks = tf.Variable(tf.ones_like(x), trainable=False)
+        # round down length to be divisible by 2
+        length = self.length if self.length % 2 == 0 else max(self.length - 1, 2)
 
-        # generate a random bounding box per image
-        for idx in trange(n, desc="Cutout", disable=not self.verbose):
-            # uniform sampling
-            center_y = tf.random.uniform(shape=[], maxval=height, dtype=tf.int32)  # pylint: disable=E1123
-            center_x = tf.random.uniform(shape=[], maxval=width, dtype=tf.int32)  # pylint: disable=E1123
-            bby1 = tf.clip_by_value(center_y - self.length // 2, 0, height)
-            bbx1 = tf.clip_by_value(center_x - self.length // 2, 0, width)
-            bby2 = tf.clip_by_value(center_y + self.length // 2, 0, height)
-            bbx2 = tf.clip_by_value(center_x + self.length // 2, 0, width)
+        # apply random cutout
+        x_nhwc = tfa.image.random_cutout(x_nhwc, (length, length))
 
-            # zero out the bounding box
+        # NCHW/NCFHW/NFHWC <-- NHWC
+        if x_ndim == 4:
             if self.channels_first:
-                bbox = masks[idx, :, bbx1:bbx2, bby1:bby2]
+                # NHWC <-- NCHW
+                x_aug = tf.transpose(x_nhwc, (0, 3, 1, 2))
             else:
-                bbox = masks[idx, bbx1:bbx2, bby1:bby2, :]
-            bbox.assign(tf.zeros_like(bbox))
-
-        x_aug = x * masks
+                x_aug = x_nhwc
+        elif x_ndim == 5:  # lgtm [py/redundant-comparison]
+            if self.channels_first:
+                # NCFHW <-- NFHWC <-- NHWC
+                x_nfhwc = tf.reshape(x_nhwc, (nb_clips, clip_size, height, width, channels))
+                x_aug = tf.transpose(x_nfhwc, (0, 4, 1, 2, 3))
+            else:
+                # NFHWC <-- NHWC
+                x_aug = tf.reshape(x_nhwc, (nb_clips, clip_size, height, width, channels))
 
         return x_aug, y
 
diff --git a/tests/defences/preprocessor/cutout/test_cutout.py b/tests/defences/preprocessor/cutout/test_cutout.py
@@ -38,9 +38,23 @@ def image_batch(request, channels_first):
     channels = request.param
 
     if channels_first:
-        data_shape = (2, channels, 16, 16)
+        data_shape = (2, channels, 12, 8)
     else:
-        data_shape = (2, 16, 16, channels)
+        data_shape = (2, 12, 8, channels)
+    return (255 * np.ones(data_shape)).astype(ART_NUMPY_DTYPE)
+
+
+@pytest.fixture(params=[1, 3], ids=["grayscale", "RGB"])
+def video_batch(request, channels_first):
+    """
+    Video fixtures of shape NFHWC and NCFHW.
+    """
+    channels = request.param
+
+    if channels_first:
+        data_shape = (2, 2, channels, 12, 8)
+    else:
+        data_shape = (2, 2, 12, 8, channels)
     return (255 * np.ones(data_shape)).astype(ART_NUMPY_DTYPE)
 
 
@@ -52,14 +66,14 @@ def empty_image(request, channels_first):
     channels = request.param
 
     if channels_first:
-        data_shape = (2, channels, 16, 16)
+        data_shape = (2, channels, 12, 8)
     else:
-        data_shape = (2, 16, 16, channels)
+        data_shape = (2, 12, 8, channels)
     return np.zeros(data_shape).astype(ART_NUMPY_DTYPE)
 
 
 @pytest.mark.framework_agnostic
-@pytest.mark.parametrize("length", [2, 4])
+@pytest.mark.parametrize("length", [4, 5])
 @pytest.mark.parametrize("channels_first", [True, False])
 def test_cutout_image_data(art_warning, image_batch, length, channels_first):
     try:
@@ -76,6 +90,25 @@ def test_cutout_image_data(art_warning, image_batch, length, channels_first):
         art_warning(e)
 
 
+@pytest.mark.framework_agnostic
+@pytest.mark.parametrize("length", [4])
+@pytest.mark.parametrize("channels_first", [True, False])
+def test_cutout_video_data(art_warning, video_batch, length, channels_first):
+    try:
+        cutout = Cutout(length=length, channels_first=channels_first)
+        count = np.not_equal(cutout(video_batch)[0], video_batch).sum()
+
+        n = video_batch.shape[0]
+        frames = video_batch.shape[1]
+        if channels_first:
+            channels = video_batch.shape[2]
+        else:
+            channels = video_batch.shape[-1]
+        assert count <= n * frames * channels * length * length
+    except ARTTestException as e:
+        art_warning(e)
+
+
 @pytest.mark.framework_agnostic
 @pytest.mark.parametrize("length", [4])
 @pytest.mark.parametrize("channels_first", [True])
@@ -91,9 +124,9 @@ def test_cutout_empty_data(art_warning, empty_image, length, channels_first):
 def test_non_image_data_error(art_warning, tabular_batch):
     try:
         test_input = tabular_batch
-        cutout = Cutout(length=8, channels_first=True)
+        cutout = Cutout(length=4, channels_first=True)
 
-        exc_msg = "Unrecognized input dimension. Cutout can only be applied to image data."
+        exc_msg = "Unrecognized input dimension. Cutout can only be applied to image and video data."
         with pytest.raises(ValueError, match=exc_msg):
             cutout(test_input)
     except ARTTestException as e:
diff --git a/tests/defences/preprocessor/cutout/test_cutout_pytorch.py b/tests/defences/preprocessor/cutout/test_cutout_pytorch.py
diff --git a/tests/defences/preprocessor/cutout/test_cutout_tensorflow.py b/tests/defences/preprocessor/cutout/test_cutout_tensorflow.py