-
Notifications
You must be signed in to change notification settings - Fork 302
Add dilate option and MultiNodeBatchNormalization to Conv2DActiv and Conv2DBNActiv #494
Changes from 19 commits
76a529a
4706184
f760ebf
18f0779
3f7f158
89bb008
a7d8123
875a216
8f9f63f
96857b8
f0a8544
80be876
b68099d
14987d2
416839d
8fcba84
eec207d
4909c12
f878fb1
9604d3e
bc29554
0b0d28f
2fe21b8
4357341
b88d94e
07c8789
59fe88b
3585df6
6b46c9b
22dd5e1
eddfb10
1153247
024d00e
59ce661
0132e33
aa858ea
a9e908d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,13 @@ | |
from chainer.functions import relu | ||
from chainer.links import BatchNormalization | ||
from chainer.links import Convolution2D | ||
from chainer.links import DilatedConvolution2D | ||
|
||
try: | ||
from chainermn.links import MultiNodeBatchNormalization | ||
_chainermn_available = True | ||
except (ImportError, TypeError): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does TypeError occur? |
||
_chainermn_available = False | ||
|
||
|
||
class Conv2DBNActiv(chainer.Chain): | ||
|
@@ -12,7 +19,10 @@ class Conv2DBNActiv(chainer.Chain): | |
|
||
The arguments are the same as that of | ||
:class:`chainer.links.Convolution2D` | ||
except for :obj:`activ` and :obj:`bn_kwargs`. | ||
except for :obj:`activ`, :obj:`bn_kwargs`, and :obj:`comm`. | ||
:obj:`comm` is a communicator of ChainerMN which is used for | ||
:obj:`MultiNodeBatchNormalization`. If :obj:`None` is given to the argument | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed |
||
:obj:`comm`, :obj:`BatchNormalization` link from Chainer is used. | ||
Note that the default value for the :obj:`nobias` | ||
is changed to :obj:`True`. | ||
|
||
|
@@ -43,6 +53,8 @@ class Conv2DBNActiv(chainer.Chain): | |
:obj:`stride=s` and :obj:`stride=(s, s)` are equivalent. | ||
pad (int or pair of ints): Spatial padding width for input arrays. | ||
:obj:`pad=p` and :obj:`pad=(p, p)` are equivalent. | ||
dilate (int or pair of ints): Dilation factor of filter applications. | ||
:obj:`dilate=d` and :obj:`dilate=(d, d)` are equivalent. | ||
nobias (bool): If :obj:`True`, | ||
then this link does not use the bias term. | ||
initialW (4-D array): Initial weight value. If :obj:`None`, the default | ||
|
@@ -57,22 +69,36 @@ class Conv2DBNActiv(chainer.Chain): | |
:func:`chainer.functions.relu`. | ||
bn_kwargs (dict): Keyword arguments passed to initialize | ||
:class:`chainer.links.BatchNormalization`. | ||
comm (:class:`~chainermn.communicators.CommunicatorBase): | ||
If a ChainerMN communicator is given, | ||
:obj:`~chainermn.links.MultiNodeBatchNormalization` will be used | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
for the batch normalization. If :obj:`None`, | ||
:obj:`~chainer.links.BatchNormalization` will be used. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I prefer to take There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, I changed it |
||
|
||
""" | ||
|
||
def __init__(self, in_channels, out_channels, ksize=None, | ||
stride=1, pad=0, nobias=True, initialW=None, | ||
initial_bias=None, activ=relu, bn_kwargs=dict()): | ||
stride=1, pad=0, dilate=1, nobias=True, initialW=None, | ||
initial_bias=None, activ=relu, bn_kwargs=dict(), comm=None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you remove |
||
if ksize is None: | ||
out_channels, ksize, in_channels = in_channels, out_channels, None | ||
|
||
self.activ = activ | ||
super(Conv2DBNActiv, self).__init__() | ||
with self.init_scope(): | ||
self.conv = Convolution2D( | ||
in_channels, out_channels, ksize, stride, pad, | ||
nobias, initialW, initial_bias) | ||
self.bn = BatchNormalization(out_channels, **bn_kwargs) | ||
if dilate > 1: | ||
self.conv = DilatedConvolution2D( | ||
in_channels, out_channels, ksize, stride, pad, dilate, | ||
nobias, initialW, initial_bias) | ||
else: | ||
self.conv = Convolution2D( | ||
in_channels, out_channels, ksize, stride, pad, | ||
nobias, initialW, initial_bias) | ||
if comm is not None and _chainermn_available: | ||
self.bn = MultiNodeBatchNormalization( | ||
out_channels, comm, **bn_kwargs) | ||
else: | ||
self.bn = BatchNormalization(out_channels, **bn_kwargs) | ||
|
||
def __call__(self, x): | ||
h = self.conv(x) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ def _add_one(x): | |
|
||
|
||
@testing.parameterize(*testing.product({ | ||
'dilate': [1, 2], | ||
'args_style': ['explicit', 'None', 'omit'], | ||
'activ': ['relu', 'add_one'] | ||
})) | ||
|
@@ -44,19 +45,19 @@ def setUp(self): | |
if self.args_style == 'explicit': | ||
self.l = Conv2DActiv( | ||
self.in_channels, self.out_channels, self.ksize, | ||
self.stride, self.pad, | ||
self.stride, self.pad, self.dilate, | ||
initialW=initialW, initial_bias=initial_bias, | ||
activ=activ) | ||
elif self.args_style == 'None': | ||
self.l = Conv2DActiv( | ||
None, self.out_channels, self.ksize, self.stride, self.pad, | ||
initialW=initialW, initial_bias=initial_bias, | ||
self.dilate, initialW=initialW, initial_bias=initial_bias, | ||
activ=activ) | ||
elif self.args_style == 'omit': | ||
self.l = Conv2DActiv( | ||
self.out_channels, self.ksize, stride=self.stride, | ||
pad=self.pad, initialW=initialW, initial_bias=initial_bias, | ||
activ=activ) | ||
pad=self.pad, dilate=self.dilate, initialW=initialW, | ||
initial_bias=initial_bias, activ=activ) | ||
|
||
def check_forward(self, x_data): | ||
x = chainer.Variable(x_data) | ||
|
@@ -65,12 +66,13 @@ def check_forward(self, x_data): | |
self.assertIsInstance(y, chainer.Variable) | ||
self.assertIsInstance(y.array, self.l.xp.ndarray) | ||
|
||
_x_data = x_data if self.dilate == 1 else x_data[:, :, 1:-1, 1:-1] | ||
if self.activ == 'relu': | ||
np.testing.assert_almost_equal( | ||
cuda.to_cpu(y.array), np.maximum(cuda.to_cpu(x_data), 0)) | ||
cuda.to_cpu(y.array), np.maximum(cuda.to_cpu(_x_data), 0)) | ||
elif self.activ == 'add_one': | ||
np.testing.assert_almost_equal( | ||
cuda.to_cpu(y.array), cuda.to_cpu(x_data) + 1) | ||
cuda.to_cpu(y.array), cuda.to_cpu(_x_data) + 1) | ||
|
||
def test_forward_cpu(self): | ||
self.check_forward(self.x) | ||
|
@@ -83,7 +85,7 @@ def test_forward_gpu(self): | |
def check_backward(self, x_data, y_grad): | ||
x = chainer.Variable(x_data) | ||
y = self.l(x) | ||
y.grad = y_grad | ||
y.grad = y_grad if self.dilate == 1 else y_grad[:, :, 1:-1, 1:-1] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about
This is better because There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, I fixed it, but there's no test case here for dilate > 2. |
||
y.backward() | ||
|
||
def test_backward_cpu(self): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,7 +15,15 @@ def _add_one(x): | |
return x + 1 | ||
|
||
|
||
try: | ||
from chainermn import create_communicator | ||
_chainermn_available = True | ||
except (ImportError, TypeError): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove TypeError if not necessary. |
||
_chainermn_available = False | ||
|
||
|
||
@testing.parameterize(*testing.product({ | ||
'dilate': [1, 2], | ||
'args_style': ['explicit', 'None', 'omit'], | ||
'activ': ['relu', 'add_one'], | ||
})) | ||
|
@@ -45,19 +53,19 @@ def setUp(self): | |
if self.args_style == 'explicit': | ||
self.l = Conv2DBNActiv( | ||
self.in_channels, self.out_channels, self.ksize, | ||
self.stride, self.pad, | ||
self.stride, self.pad, self.dilate, | ||
initialW=initialW, initial_bias=initial_bias, | ||
activ=activ, bn_kwargs=bn_kwargs) | ||
elif self.args_style == 'None': | ||
self.l = Conv2DBNActiv( | ||
None, self.out_channels, self.ksize, self.stride, self.pad, | ||
initialW=initialW, initial_bias=initial_bias, | ||
self.dilate, initialW=initialW, initial_bias=initial_bias, | ||
activ=activ, bn_kwargs=bn_kwargs) | ||
elif self.args_style == 'omit': | ||
self.l = Conv2DBNActiv( | ||
self.out_channels, self.ksize, stride=self.stride, | ||
pad=self.pad, initialW=initialW, initial_bias=initial_bias, | ||
activ=activ, bn_kwargs=bn_kwargs) | ||
pad=self.pad, dilate=self.dilate, initialW=initialW, | ||
initial_bias=initial_bias, activ=activ, bn_kwargs=bn_kwargs) | ||
|
||
def check_forward(self, x_data): | ||
x = chainer.Variable(x_data) | ||
|
@@ -70,14 +78,15 @@ def check_forward(self, x_data): | |
self.assertIsInstance(y, chainer.Variable) | ||
self.assertIsInstance(y.array, self.l.xp.ndarray) | ||
|
||
_x_data = x_data if self.dilate == 1 else x_data[:, :, 1:-1, 1:-1] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto |
||
if self.activ == 'relu': | ||
np.testing.assert_almost_equal( | ||
cuda.to_cpu(y.array), np.maximum(cuda.to_cpu(x_data), 0), | ||
cuda.to_cpu(y.array), np.maximum(cuda.to_cpu(_x_data), 0), | ||
decimal=4 | ||
) | ||
elif self.activ == 'add_one': | ||
np.testing.assert_almost_equal( | ||
cuda.to_cpu(y.array), cuda.to_cpu(x_data) + 1, | ||
cuda.to_cpu(y.array), cuda.to_cpu(_x_data) + 1, | ||
decimal=4 | ||
) | ||
|
||
|
@@ -92,7 +101,7 @@ def test_forward_gpu(self): | |
def check_backward(self, x_data, y_grad): | ||
x = chainer.Variable(x_data) | ||
y = self.l(x) | ||
y.grad = y_grad | ||
y.grad = y_grad if self.dilate == 1 else y_grad[:, :, 1:-1, 1:-1] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto |
||
y.backward() | ||
|
||
def test_backward_cpu(self): | ||
|
@@ -104,4 +113,72 @@ def test_backward_gpu(self): | |
self.check_backward(cuda.to_gpu(self.x), cuda.to_gpu(self.gy)) | ||
|
||
|
||
@unittest.skipIf(not _chainermn_available, 'ChainerMN is not installed') | ||
class TestConv2DMultiNodeBNActiv(unittest.TestCase): | ||
|
||
in_channels = 1 | ||
out_channels = 1 | ||
ksize = 3 | ||
stride = 1 | ||
pad = 1 | ||
dilate = 1 | ||
|
||
def setUp(self): | ||
self.x = np.random.uniform( | ||
-1, 1, (5, self.in_channels, 5, 5)).astype(np.float32) | ||
self.gy = np.random.uniform( | ||
-1, 1, (5, self.out_channels, 5, 5)).astype(np.float32) | ||
|
||
# Convolution is the identity function. | ||
initialW = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]], | ||
dtype=np.float32).reshape((1, 1, 3, 3)) | ||
bn_kwargs = {'decay': 0.8} | ||
initial_bias = 0 | ||
comm = create_communicator('naive') | ||
activ = relu | ||
self.l = Conv2DBNActiv( | ||
self.in_channels, self.out_channels, self.ksize, self.stride, | ||
self.pad, self.dilate, initialW=initialW, | ||
initial_bias=initial_bias, activ=activ, bn_kwargs=bn_kwargs, | ||
comm=comm) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Include |
||
|
||
def check_forward(self, x_data): | ||
x = chainer.Variable(x_data) | ||
# Make the batch normalization to be the identity function. | ||
self.l.bn.avg_var[:] = 1 | ||
self.l.bn.avg_mean[:] = 0 | ||
with chainer.using_config('train', False): | ||
y = self.l(x) | ||
|
||
self.assertIsInstance(y, chainer.Variable) | ||
self.assertIsInstance(y.array, self.l.xp.ndarray) | ||
|
||
np.testing.assert_almost_equal( | ||
cuda.to_cpu(y.array), np.maximum(cuda.to_cpu(x_data), 0), | ||
decimal=4 | ||
) | ||
|
||
def test_multi_node_bach_normalization_forward_cpu(self): | ||
self.check_forward(self.x) | ||
|
||
@attr.gpu | ||
def test_multi_node_bach_normalization_forward_gpu(self): | ||
self.l.to_gpu() | ||
self.check_forward(cuda.to_gpu(self.x)) | ||
|
||
def check_backward(self, x_data, y_grad): | ||
x = chainer.Variable(x_data) | ||
y = self.l(x) | ||
y.grad = y_grad | ||
y.backward() | ||
|
||
def test_multi_node_bach_normalization_backward_cpu(self): | ||
self.check_backward(self.x, self.gy) | ||
|
||
@attr.gpu | ||
def test_multi_node_bach_normalization_backward_gpu(self): | ||
self.l.to_gpu() | ||
self.check_backward(cuda.to_gpu(self.x), cuda.to_gpu(self.gy)) | ||
|
||
|
||
testing.run_module(__name__, __file__) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please update to the latest release.