chainer · yuyu2172 · Mar 25, 2018 · Dec 10, 2017 · Dec 10, 2017 · Dec 10, 2017
diff --git a/.travis.yml b/.travis.yml
@@ -28,8 +28,15 @@ install:
   - conda info -a
 
   - if [[ "$OPTIONAL_MODULES" == "1" ]]; then
+      export LIBRARY_PATH="$HOME/miniconda/lib:$LIBRARY_PATH";
       conda env create -f environment.yml;
       source activate chainercv;
+      cd $HOME;
+      wget https://github.com/chainer/chainermn/archive/v1.0.0.tar.gz -O chainermn.tar.gz;
+      tar zxf chainermn.tar.gz;
+      cd chainermn-1.0.0;
+      python setup.py install --no-nccl;
+      cd $TRAVIS_BUILD_DIR;
     else
       conda env create -f environment_minimum.yml;
       source activate chainercv_minimum;
@@ -49,4 +56,8 @@ script:
   - autopep8 -r . | tee check_autopep8
   - test ! -s check_autopep8
   - python style_checker.py .
-  - MPLBACKEND="agg" nosetests -a '!gpu,!slow' tests
+  - if [[ "$OPTIONAL_MODULES" == "1" ]]; then
+      MPLBACKEND="agg" mpiexec -n 2 nosetests -a '!gpu,!slow' tests;
+    else
+      MPLBACKEND="agg" nosetests -a '!gpu,!slow' tests;
+    fi
diff --git a/chainercv/links/connection/conv_2d_activ.py b/chainercv/links/connection/conv_2d_activ.py
@@ -1,6 +1,7 @@
 import chainer
 from chainer.functions import relu
 from chainer.links import Convolution2D
+from chainer.links import DilatedConvolution2D
 
 
 class Conv2DActiv(chainer.Chain):
@@ -40,6 +41,8 @@ class Conv2DActiv(chainer.Chain):
             :obj:`stride=s` and :obj:`stride=(s, s)` are equivalent.
         pad (int or pair of ints): Spatial padding width for input arrays.
             :obj:`pad=p` and :obj:`pad=(p, p)` are equivalent.
+        dilate (int or pair of ints): Dilation factor of filter applications.
+            :obj:`dilate=d` and :obj:`dilate=(d, d)` are equivalent.
         nobias (bool): If :obj:`True`,
             then this link does not use the bias term.
         initialW (4-D array): Initial weight value. If :obj:`None`, the default
@@ -56,17 +59,22 @@ class Conv2DActiv(chainer.Chain):
     """
 
     def __init__(self, in_channels, out_channels, ksize=None,
-                 stride=1, pad=0, nobias=False, initialW=None,
+                 stride=1, pad=0, dilate=1, nobias=False, initialW=None,
                  initial_bias=None, activ=relu):
         if ksize is None:
             out_channels, ksize, in_channels = in_channels, out_channels, None
 
         self.activ = activ
         super(Conv2DActiv, self).__init__()
         with self.init_scope():
-            self.conv = Convolution2D(
-                in_channels, out_channels, ksize, stride, pad,
-                nobias, initialW, initial_bias)
+            if dilate > 1:
+                self.conv = DilatedConvolution2D(
+                    in_channels, out_channels, ksize, stride, pad, dilate,
+                    nobias, initialW, initial_bias)
+            else:
+                self.conv = Convolution2D(
+                    in_channels, out_channels, ksize, stride, pad,
+                    nobias, initialW, initial_bias)
 
     def __call__(self, x):
         h = self.conv(x)

diff --git a/chainercv/links/connection/conv_2d_bn_activ.py b/chainercv/links/connection/conv_2d_bn_activ.py
@@ -2,6 +2,13 @@
 from chainer.functions import relu
 from chainer.links import BatchNormalization
 from chainer.links import Convolution2D
+from chainer.links import DilatedConvolution2D
+
+try:
+    from chainermn.links import MultiNodeBatchNormalization
+    _chainermn_available = True
+except (ImportError, TypeError):
+    _chainermn_available = False
 
 
 class Conv2DBNActiv(chainer.Chain):
@@ -12,7 +19,10 @@ class Conv2DBNActiv(chainer.Chain):
 
     The arguments are the same as that of
     :class:`chainer.links.Convolution2D`
-    except for :obj:`activ` and :obj:`bn_kwargs`.
+    except for :obj:`activ`, :obj:`bn_kwargs`, and :obj:`comm`.
+    :obj:`comm` is a communicator of ChainerMN which is used for
+    :obj:`MultiNodeBatchNormalization`. If :obj:`None` is given to the argument
+    :obj:`comm`, :obj:`BatchNormalization` link from Chainer is used.
     Note that the default value for the :obj:`nobias`
     is changed to :obj:`True`.
 
@@ -43,6 +53,8 @@ class Conv2DBNActiv(chainer.Chain):
             :obj:`stride=s` and :obj:`stride=(s, s)` are equivalent.
         pad (int or pair of ints): Spatial padding width for input arrays.
             :obj:`pad=p` and :obj:`pad=(p, p)` are equivalent.
+        dilate (int or pair of ints): Dilation factor of filter applications.
+            :obj:`dilate=d` and :obj:`dilate=(d, d)` are equivalent.
         nobias (bool): If :obj:`True`,
             then this link does not use the bias term.
         initialW (4-D array): Initial weight value. If :obj:`None`, the default
@@ -57,22 +69,36 @@ class Conv2DBNActiv(chainer.Chain):
             :func:`chainer.functions.relu`.
         bn_kwargs (dict): Keyword arguments passed to initialize
             :class:`chainer.links.BatchNormalization`.
+        comm (:class:`~chainermn.communicators.CommunicatorBase):
+            If a ChainerMN communicator is given,
+            :obj:`~chainermn.links.MultiNodeBatchNormalization` will be used
+            for the batch normalization. If :obj:`None`,
+            :obj:`~chainer.links.BatchNormalization` will be used.
 
     """
 
     def __init__(self, in_channels, out_channels, ksize=None,
-                 stride=1, pad=0, nobias=True, initialW=None,
-                 initial_bias=None, activ=relu, bn_kwargs=dict()):
+                 stride=1, pad=0, dilate=1, nobias=True, initialW=None,
+                 initial_bias=None, activ=relu, bn_kwargs=dict(), comm=None):
         if ksize is None:
             out_channels, ksize, in_channels = in_channels, out_channels, None
 
         self.activ = activ
         super(Conv2DBNActiv, self).__init__()
         with self.init_scope():
-            self.conv = Convolution2D(
-                in_channels, out_channels, ksize, stride, pad,
-                nobias, initialW, initial_bias)
-            self.bn = BatchNormalization(out_channels, **bn_kwargs)
+            if dilate > 1:
+                self.conv = DilatedConvolution2D(
+                    in_channels, out_channels, ksize, stride, pad, dilate,
+                    nobias, initialW, initial_bias)
+            else:
+                self.conv = Convolution2D(
+                    in_channels, out_channels, ksize, stride, pad,
+                    nobias, initialW, initial_bias)
+            if comm is not None and _chainermn_available:
+                self.bn = MultiNodeBatchNormalization(
+                    out_channels, comm, **bn_kwargs)
+            else:
+                self.bn = BatchNormalization(out_channels, **bn_kwargs)
 
     def __call__(self, x):
         h = self.conv(x)

diff --git a/environment.yml b/environment.yml
@@ -2,6 +2,8 @@ name: chainercv
 channels:
 - !!python/unicode
   'menpo'
+- !!python/unicode
+  'mpi4py'
 - !!python/unicode
   'defaults'
 dependencies:
@@ -10,3 +12,4 @@ dependencies:
   - matplotlib
   - numpy
   - Pillow
+  - openmpi
diff --git a/tests/links_tests/connection_tests/test_conv_2d_activ.py b/tests/links_tests/connection_tests/test_conv_2d_activ.py
@@ -16,6 +16,7 @@ def _add_one(x):
 
 
 @testing.parameterize(*testing.product({
+    'dilate': [1, 2],
     'args_style': ['explicit', 'None', 'omit'],
     'activ': ['relu', 'add_one']
 }))
@@ -44,19 +45,19 @@ def setUp(self):
         if self.args_style == 'explicit':
             self.l = Conv2DActiv(
                 self.in_channels, self.out_channels, self.ksize,
-                self.stride, self.pad,
+                self.stride, self.pad, self.dilate,
                 initialW=initialW, initial_bias=initial_bias,
                 activ=activ)
         elif self.args_style == 'None':
             self.l = Conv2DActiv(
                 None, self.out_channels, self.ksize, self.stride, self.pad,
-                initialW=initialW, initial_bias=initial_bias,
+                self.dilate, initialW=initialW, initial_bias=initial_bias,
                 activ=activ)
         elif self.args_style == 'omit':
             self.l = Conv2DActiv(
                 self.out_channels, self.ksize, stride=self.stride,
-                pad=self.pad, initialW=initialW, initial_bias=initial_bias,
-                activ=activ)
+                pad=self.pad, dilate=self.dilate, initialW=initialW,
+                initial_bias=initial_bias, activ=activ)
 
     def check_forward(self, x_data):
         x = chainer.Variable(x_data)
@@ -65,12 +66,13 @@ def check_forward(self, x_data):
         self.assertIsInstance(y, chainer.Variable)
         self.assertIsInstance(y.array, self.l.xp.ndarray)
 
+        _x_data = x_data if self.dilate == 1 else x_data[:, :, 1:-1, 1:-1]
         if self.activ == 'relu':
             np.testing.assert_almost_equal(
-                cuda.to_cpu(y.array), np.maximum(cuda.to_cpu(x_data), 0))
+                cuda.to_cpu(y.array), np.maximum(cuda.to_cpu(_x_data), 0))
         elif self.activ == 'add_one':
             np.testing.assert_almost_equal(
-                cuda.to_cpu(y.array), cuda.to_cpu(x_data) + 1)
+                cuda.to_cpu(y.array), cuda.to_cpu(_x_data) + 1)
 
     def test_forward_cpu(self):
         self.check_forward(self.x)
@@ -83,7 +85,7 @@ def test_forward_gpu(self):
     def check_backward(self, x_data, y_grad):
         x = chainer.Variable(x_data)
         y = self.l(x)
-        y.grad = y_grad
+        y.grad = y_grad if self.dilate == 1 else y_grad[:, :, 1:-1, 1:-1]
         y.backward()
 
     def test_backward_cpu(self):

diff --git a/tests/links_tests/connection_tests/test_conv_2d_bn_activ.py b/tests/links_tests/connection_tests/test_conv_2d_bn_activ.py
@@ -15,7 +15,15 @@ def _add_one(x):
     return x + 1
 
 
+try:
+    from chainermn import create_communicator
+    _chainermn_available = True
+except (ImportError, TypeError):
+    _chainermn_available = False
+
+
 @testing.parameterize(*testing.product({
+    'dilate': [1, 2],
     'args_style': ['explicit', 'None', 'omit'],
     'activ': ['relu', 'add_one'],
 }))
@@ -45,19 +53,19 @@ def setUp(self):
         if self.args_style == 'explicit':
             self.l = Conv2DBNActiv(
                 self.in_channels, self.out_channels, self.ksize,
-                self.stride, self.pad,
+                self.stride, self.pad, self.dilate,
                 initialW=initialW, initial_bias=initial_bias,
                 activ=activ, bn_kwargs=bn_kwargs)
         elif self.args_style == 'None':
             self.l = Conv2DBNActiv(
                 None, self.out_channels, self.ksize, self.stride, self.pad,
-                initialW=initialW, initial_bias=initial_bias,
+                self.dilate, initialW=initialW, initial_bias=initial_bias,
                 activ=activ, bn_kwargs=bn_kwargs)
         elif self.args_style == 'omit':
             self.l = Conv2DBNActiv(
                 self.out_channels, self.ksize, stride=self.stride,
-                pad=self.pad, initialW=initialW, initial_bias=initial_bias,
-                activ=activ, bn_kwargs=bn_kwargs)
+                pad=self.pad, dilate=self.dilate, initialW=initialW,
+                initial_bias=initial_bias, activ=activ, bn_kwargs=bn_kwargs)
 
     def check_forward(self, x_data):
         x = chainer.Variable(x_data)
@@ -70,14 +78,15 @@ def check_forward(self, x_data):
         self.assertIsInstance(y, chainer.Variable)
         self.assertIsInstance(y.array, self.l.xp.ndarray)
 
+        _x_data = x_data if self.dilate == 1 else x_data[:, :, 1:-1, 1:-1]
         if self.activ == 'relu':
             np.testing.assert_almost_equal(
-                cuda.to_cpu(y.array), np.maximum(cuda.to_cpu(x_data), 0),
+                cuda.to_cpu(y.array), np.maximum(cuda.to_cpu(_x_data), 0),
                 decimal=4
             )
         elif self.activ == 'add_one':
             np.testing.assert_almost_equal(
-                cuda.to_cpu(y.array), cuda.to_cpu(x_data) + 1,
+                cuda.to_cpu(y.array), cuda.to_cpu(_x_data) + 1,
                 decimal=4
             )
 
@@ -92,7 +101,7 @@ def test_forward_gpu(self):
     def check_backward(self, x_data, y_grad):
         x = chainer.Variable(x_data)
         y = self.l(x)
-        y.grad = y_grad
+        y.grad = y_grad if self.dilate == 1 else y_grad[:, :, 1:-1, 1:-1]
         y.backward()
 
     def test_backward_cpu(self):
@@ -104,4 +113,72 @@ def test_backward_gpu(self):
         self.check_backward(cuda.to_gpu(self.x), cuda.to_gpu(self.gy))
 
 
+@unittest.skipIf(not _chainermn_available, 'ChainerMN is not installed')
+class TestConv2DMultiNodeBNActiv(unittest.TestCase):
+
+    in_channels = 1
+    out_channels = 1
+    ksize = 3
+    stride = 1
+    pad = 1
+    dilate = 1
+
+    def setUp(self):
+        self.x = np.random.uniform(
+            -1, 1, (5, self.in_channels, 5, 5)).astype(np.float32)
+        self.gy = np.random.uniform(
+            -1, 1, (5, self.out_channels, 5, 5)).astype(np.float32)
+
+        # Convolution is the identity function.
+        initialW = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]],
+                            dtype=np.float32).reshape((1, 1, 3, 3))
+        bn_kwargs = {'decay': 0.8}
+        initial_bias = 0
+        comm = create_communicator('naive')
+        activ = relu
+        self.l = Conv2DBNActiv(
+            self.in_channels, self.out_channels, self.ksize, self.stride,
+            self.pad, self.dilate, initialW=initialW,
+            initial_bias=initial_bias, activ=activ, bn_kwargs=bn_kwargs,
+            comm=comm)
+
+    def check_forward(self, x_data):
+        x = chainer.Variable(x_data)
+        # Make the batch normalization to be the identity function.
+        self.l.bn.avg_var[:] = 1
+        self.l.bn.avg_mean[:] = 0
+        with chainer.using_config('train', False):
+            y = self.l(x)
+
+        self.assertIsInstance(y, chainer.Variable)
+        self.assertIsInstance(y.array, self.l.xp.ndarray)
+
+        np.testing.assert_almost_equal(
+            cuda.to_cpu(y.array), np.maximum(cuda.to_cpu(x_data), 0),
+            decimal=4
+        )
+
+    def test_multi_node_bach_normalization_forward_cpu(self):
+        self.check_forward(self.x)
+
+    @attr.gpu
+    def test_multi_node_bach_normalization_forward_gpu(self):
+        self.l.to_gpu()
+        self.check_forward(cuda.to_gpu(self.x))
+
+    def check_backward(self, x_data, y_grad):
+        x = chainer.Variable(x_data)
+        y = self.l(x)
+        y.grad = y_grad
+        y.backward()
+
+    def test_multi_node_bach_normalization_backward_cpu(self):
+        self.check_backward(self.x, self.gy)
+
+    @attr.gpu
+    def test_multi_node_bach_normalization_backward_gpu(self):
+        self.l.to_gpu()
+        self.check_backward(cuda.to_gpu(self.x), cuda.to_gpu(self.gy))
+
+
 testing.run_module(__name__, __file__)