Skip to content
This repository was archived by the owner on Jul 2, 2021. It is now read-only.

Commit bceb057

Browse files
committed
misc
1 parent e468545 commit bceb057

File tree

8 files changed

+390
-221
lines changed

8 files changed

+390
-221
lines changed

chainercv/links/model/mask_rcnn/mask_head.py

+118-14
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,14 @@
1818

1919
class MaskHead(chainer.Chain):
2020

21+
"""Mask Head network of Mask R-CNN.
22+
23+
Args:
24+
n_class (int): The number of classes including background.
25+
scales (tuple of floats): The scales of feature maps.
26+
27+
"""
28+
2129
_canonical_level = 2
2230
_canonical_scale = 224
2331
_roi_size = 14
@@ -67,6 +75,30 @@ def __call__(self, hs, rois, roi_indices):
6775
return self.seg(h)
6876

6977
def distribute(self, rois, roi_indices):
78+
"""Assigns feature levels to Rois based on their size.
79+
80+
Args:
81+
rois (array): An array of shape :math:`(R, 4)`, \
82+
where :math:`R` is the total number of RoIs in the given batch.
83+
roi_indices (array): An array of shape :math:`(R,)`.
84+
85+
Returns:
86+
two lists and one array:
87+
:obj:`out_rois`, :obj:`out_roi_indices` and :obj:`order`.
88+
89+
* **out_rois**: A list of arrays of shape :math:`(R_l, 4)`, \
90+
where :math:`R_l` is the number of RoIs in the :math:`l`-th \
91+
feature map.
92+
* **out_roi_indices** : A list of arrays of shape :math:`(R_l,)`.
93+
* **order**: A correspondence between the output and the input. \
94+
The relationship below is satisfied.
95+
96+
.. code:: python
97+
98+
xp.concatenate(out_rois, axis=0)[order[i]] == rois[i]
99+
100+
"""
101+
70102
size = self.xp.sqrt(self.xp.prod(rois[:, 2:] - rois[:, :2], axis=1))
71103
level = self.xp.floor(self.xp.log2(
72104
size / self._canonical_scale + 1e-6)).astype(np.int32)
@@ -75,18 +107,39 @@ def distribute(self, rois, roi_indices):
75107
level + self._canonical_level, 0, len(self._scales) - 2)
76108

77109
masks = [level == l for l in range(len(self._scales))]
78-
rois = [rois[mask] for mask in masks]
79-
roi_indices = [roi_indices[mask] for mask in masks]
110+
out_rois = [rois[mask] for mask in masks]
111+
out_roi_indices = [roi_indices[mask] for mask in masks]
80112
order = self.xp.argsort(
81113
self.xp.concatenate([self.xp.where(mask)[0] for mask in masks]))
82-
return rois, roi_indices, order
114+
return out_rois, out_roi_indices, order
83115

84116
def decode(self, segms, bboxes, labels, sizes):
85-
# CPU is used because cv2.resize only accepts numpy arrays.
86-
segms = [chainer.backends.cuda.to_cpu(segm) for segm in segms]
87-
bboxes = [chainer.backends.cuda.to_cpu(bbox) for bbox in bboxes]
88-
labels = [chainer.backends.cuda.to_cpu(label) for label in labels]
89-
117+
"""Decodes back to masks.
118+
119+
Args:
120+
segms (iterable of arrays): An iterable of arrays of
121+
shape :math:`(R_n, n\_class, M, M)`.
122+
bboxes (iterable of arrays): An iterable of arrays of
123+
shape :math:`(R_n, 4)`.
124+
labels (iterable of arrays): An iterable of arrays of
125+
shape :math:`(R_n,)`.
126+
sizes (list of tuples of two ints): A list of
127+
:math:`(H_n, W_n)`, where :math:`H_n` and :math:`W_n`
128+
are height and width of the :math:`n`-th image.
129+
130+
Returns:
131+
list of arrays:
132+
This list contains instance segmentation for each image
133+
in the batch.
134+
More precisely, this is a list of boolean arrays of shape
135+
:math:`(R'_n, H_n, W_n)`, where :math:`R'_n` is the number of
136+
bounding boxes in the :math:`n`-th image.
137+
"""
138+
139+
xp = chainer.backends.cuda.get_array_module(*segms)
140+
if xp != np:
141+
raise ValueError(
142+
'MaskHead.decode only supports numpy inputs for now.')
90143
masks = []
91144
# To work around an issue with cv2.resize (it seems to automatically
92145
# pad with repeated border values), we manually zero-pad the masks by 1
@@ -101,7 +154,7 @@ def decode(self, segms, bboxes, labels, sizes):
101154
img_H, img_W = size
102155
mask = np.zeros((len(bbox), img_H, img_W), dtype=np.bool)
103156

104-
bbox = expand_boxes(bbox, cv2_expand_scale)
157+
bbox = _expand_boxes(bbox, cv2_expand_scale)
105158
for i, (bb, sgm, lbl) in enumerate(zip(bbox, segm, label)):
106159
bb = bb.astype(np.int32)
107160
padded_mask[1:-1, 1:-1] = sgm[lbl + 1]
@@ -124,7 +177,7 @@ def decode(self, segms, bboxes, labels, sizes):
124177
return masks
125178

126179

127-
def expand_boxes(bbox, scale):
180+
def _expand_boxes(bbox, scale):
128181
"""Expand an array of boxes by a given scale."""
129182
xp = chainer.backends.cuda.get_array_module(bbox)
130183

@@ -147,6 +200,42 @@ def expand_boxes(bbox, scale):
147200

148201
def mask_loss_pre(rois, roi_indices, gt_masks, gt_head_labels,
149202
mask_size):
203+
"""Loss function for Mask Head (pre).
204+
205+
This function processes RoIs for :func:`mask_loss_post` by
206+
selecting RoIs for mask loss calculation and
207+
preparing ground truth network output.
208+
209+
Args:
210+
rois (iterable of arrays): An iterable of arrays of
211+
shape :math:`(R_l, 4)`, where :math:`R_l` is the number
212+
of RoIs in the :math:`l`-th feature map.
213+
roi_indices (iterable of arrays): An iterable of arrays of
214+
shape :math:`(R_l,)`.
215+
gt_masks (iterable of arrays): A list of arrays whose shape is
216+
:math:`(R_n, H, W)`, where :math:`R_n` is the number of
217+
ground truth objects.
218+
gt_head_labels (iterable of arrays): An iterable of arrays of
219+
shape :math:`(R_l,)`. This is a collection of ground-truth
220+
labels assigned to :obj:`rois` during bounding box localization
221+
stage. The range of value is :math:`(0, n\_class - 1)`.
222+
mask_size (int): Size of the ground truth network output.
223+
224+
Returns:
225+
tuple of four lists:
226+
:obj:`mask_rois`, :obj:`mask_roi_indices`,
227+
:obj:`gt_segms`, and :obj:`gt_mask_labels`.
228+
229+
* **rois**: A list of arrays of shape :math:`(R'_l, 4)`, \
230+
where :math:`R'_l` is the number of RoIs in the :math:`l`-th \
231+
feature map.
232+
* **roi_indices**: A list of arrays of shape :math:`(R'_l,)`.
233+
* **gt_segms**: A list of arrays of shape :math:`(R'_l, M, M). \
234+
:math:`M` is the argument :obj:`mask_size`.
235+
* **gt_mask_labels**: A list of arrays of shape :math:`(R'_l,)` \
236+
indicating the classes of ground truth.
237+
"""
238+
150239
xp = cuda.get_array_module(*rois)
151240

152241
n_level = len(rois)
@@ -172,7 +261,7 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_head_labels,
172261
mask_roi = mask_rois[index]
173262
iou = bbox_iou(mask_roi, gt_bbox)
174263
gt_index = iou.argmax(axis=1)
175-
gt_segms[index] = segm_wrt_bbox(
264+
gt_segms[index] = _segm_wrt_bbox(
176265
gt_mask[gt_index], mask_roi, (mask_size, mask_size))
177266

178267
flag_masks = [mask_roi_levels == l for l in range(n_level)]
@@ -185,8 +274,23 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_head_labels,
185274

186275
def mask_loss_post(segms, mask_roi_indices, gt_segms, gt_mask_labels,
187276
batchsize):
188-
# Just compute loss for the foreground class
189-
# divide by the batchsize
277+
"""Loss function for Head (post).
278+
279+
Args:
280+
segms (array): An array whose shape is :math:`(R, n\_class, M, M)`,
281+
where :math:`R` is the total number of RoIs in the given batch.
282+
mask_roi_indices (array): A list of arrays returned by
283+
:func:`mask_loss_pre`.
284+
gt_segms (list of arrays): A list of arrays returned by
285+
:func:`mask_loss_pre`.
286+
gt_mask_labels (list of arrays): A list of arrays returned by
287+
:func:`mask_loss_pre`.
288+
batchsize (int): The size of batch.
289+
290+
Returns:
291+
chainer.Variable:
292+
Mask loss.
293+
"""
190294
xp = cuda.get_array_module(segms.array)
191295

192296
mask_roi_indices = xp.hstack(mask_roi_indices).astype(np.int32)
@@ -206,7 +310,7 @@ def mask_loss_post(segms, mask_roi_indices, gt_segms, gt_mask_labels,
206310
return mask_loss
207311

208312

209-
def segm_wrt_bbox(mask, bbox, size):
313+
def _segm_wrt_bbox(mask, bbox, size):
210314
xp = chainer.backends.cuda.get_array_module(mask)
211315

212316
bbox = bbox.astype(np.int32)

chainercv/links/model/mask_rcnn/mask_rcnn.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from __future__ import division
22

33
import numpy as np
4-
import PIL
54

65
import chainer
76
from chainer.backends import cuda
@@ -153,13 +152,13 @@ def predict(self, imgs):
153152
dtype=np.float32)
154153
for segm in segms]
155154

155+
segms = [chainer.backends.cuda.to_cpu(segm) for segm in segms]
156+
bboxes = [chainer.backends.cuda.to_cpu(bbox / scale)
157+
for bbox, scale in zip(rescaled_bboxes, scales)]
158+
labels = [chainer.backends.cuda.to_cpu(label) for label in labels]
156159
masks = self.mask_head.decode(
157-
segms,
158-
[bbox / scale for bbox, scale in zip(rescaled_bboxes, scales)],
159-
labels, sizes)
160+
segms, bboxes, labels, sizes)
160161

161-
masks = [cuda.to_cpu(mask) for mask in masks]
162-
labels = [cuda.to_cpu(label) for label in labels]
163162
scores = [cuda.to_cpu(score) for score in scores]
164163
return masks, labels, scores
165164

@@ -172,8 +171,9 @@ def prepare(self, imgs, masks=None):
172171
and the range of their value is :math:`[0, 255]`.
173172
174173
Returns:
175-
Two arrays: preprocessed images and \
176-
scales that were caluclated in prepocessing.
174+
Three arrays: preprocessed images, \
175+
scales that were caluclated in prepocessing and
176+
the size of the images after resizing.
177177
178178
"""
179179
scales = []

chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py

+65
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@
1717

1818
class MaskRCNNFPNResNet(MaskRCNN):
1919

20+
"""Base class for Mask R-CNN with ResNet backbone.
21+
22+
A subclass of this class should have :obj:`_base` and :obj:`_models`.
23+
"""
24+
2025
def __init__(self, n_fg_class=None, pretrained_model=None):
2126
param, path = utils.prepare_pretrained_model(
2227
{'n_fg_class': n_fg_class}, pretrained_model, self._models)
@@ -46,6 +51,36 @@ def __init__(self, n_fg_class=None, pretrained_model=None):
4651

4752
class MaskRCNNFPNResNet50(MaskRCNNFPNResNet):
4853

54+
"""Mask R-CNN with ResNet-50.
55+
56+
This is a model of Mask R-CNN [#]_.
57+
This model uses :class:`~chainercv.links.ResNet50` as
58+
its base feature extractor.
59+
60+
.. [#] Kaiming He et al. Mask R-CNN. ICCV 2017
61+
62+
Args:
63+
n_fg_class (int): The number of classes excluding the background.
64+
pretrained_model (string): The weight file to be loaded.
65+
This can take :obj:`'coco'`, `filepath` or :obj:`None`.
66+
The default value is :obj:`None`.
67+
68+
* :obj:`'coco'`: Load weights trained on train split of \
69+
MS COCO 2017. \
70+
The weight file is downloaded and cached automatically. \
71+
:obj:`n_fg_class` must be :obj:`80` or :obj:`None`.
72+
* :obj:`'imagenet'`: Load weights of ResNet-50 trained on \
73+
ImageNet. \
74+
The weight file is downloaded and cached automatically. \
75+
This option initializes weights partially and the rests are \
76+
initialized randomly. In this case, :obj:`n_fg_class` \
77+
can be set to any number.
78+
* `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \
79+
must be specified properly.
80+
* :obj:`None`: Do not load weights.
81+
82+
"""
83+
4984
_base = ResNet50
5085
_models = {
5186
'coco': {
@@ -58,6 +93,36 @@ class MaskRCNNFPNResNet50(MaskRCNNFPNResNet):
5893

5994
class MaskRCNNFPNResNet101(MaskRCNNFPNResNet):
6095

96+
"""Mask R-CNN with ResNet-101.
97+
98+
This is a model of Mask R-CNN [#]_.
99+
This model uses :class:`~chainercv.links.ResNet101` as
100+
its base feature extractor.
101+
102+
.. [#] Kaiming He et al. Mask R-CNN. ICCV 2017
103+
104+
Args:
105+
n_fg_class (int): The number of classes excluding the background.
106+
pretrained_model (string): The weight file to be loaded.
107+
This can take :obj:`'coco'`, `filepath` or :obj:`None`.
108+
The default value is :obj:`None`.
109+
110+
* :obj:`'coco'`: Load weights trained on train split of \
111+
MS COCO 2017. \
112+
The weight file is downloaded and cached automatically. \
113+
:obj:`n_fg_class` must be :obj:`80` or :obj:`None`.
114+
* :obj:`'imagenet'`: Load weights of ResNet-101 trained on \
115+
ImageNet. \
116+
The weight file is downloaded and cached automatically. \
117+
This option initializes weights partially and the rests are \
118+
initialized randomly. In this case, :obj:`n_fg_class` \
119+
can be set to any number.
120+
* `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \
121+
must be specified properly.
122+
* :obj:`None`: Do not load weights.
123+
124+
"""
125+
61126
_base = ResNet101
62127
_models = {
63128
'coco': {

examples/mask_rcnn/demo.py

+8-13
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,22 @@
1414
def main():
1515
parser = argparse.ArgumentParser()
1616
parser.add_argument('--gpu', type=int, default=-1)
17-
parser.add_argument('--model', choices=('resnet50', 'resnet101'))
17+
parser.add_argument(
18+
'--model',
19+
choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'),
20+
default='mask_rcnn_fpn_resnet50'
21+
)
1822
group = parser.add_mutually_exclusive_group()
1923
group.add_argument('--pretrained-model')
2024
group.add_argument('--snapshot')
2125
parser.add_argument('image')
2226
args = parser.parse_args()
2327

24-
if args.model == 'resnet50':
28+
if args.model == 'mask_rcnn_fpn_resnet50':
2529
model = MaskRCNNFPNResNet50(
2630
n_fg_class=len(coco_instance_segmentation_label_names),
2731
pretrained_model=args.pretrained_model)
28-
elif args.model == 'resnet101':
32+
elif args.model == 'mask_rcnn_fpn_resnet101':
2933
model = MaskRCNNFPNResNet101(
3034
n_fg_class=len(coco_instance_segmentation_label_names),
3135
pretrained_model=args.pretrained_model)
@@ -35,21 +39,12 @@ def main():
3539
model.to_gpu()
3640

3741
img = utils.read_image(args.image)
38-
# bboxes, masks, labels, scores = model.predict([img])
3942
masks, labels, scores = model.predict([img])
40-
# bbox = bboxes[0]
4143
mask = masks[0]
4244
label = labels[0]
4345
score = scores[0]
44-
45-
# chainercv.visualizations.vis_bbox(
46-
# img, bbox, label, score, label_names=coco_bbox_label_names)
47-
48-
import numpy as np
49-
# flag = np.array([bb[3] - bb[1] < 300 for bb in bbox], dtype=np.bool)
50-
flag = np.ones(len(mask), dtype=np.bool)
5146
chainercv.visualizations.vis_instance_segmentation(
52-
img, mask[flag], label[flag], score[flag],
47+
img, mask, label, score,
5348
label_names=coco_instance_segmentation_label_names)
5449
plt.show()
5550

examples/mask_rcnn/train_multi.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,9 @@ def copyparams(dst, src):
143143
def main():
144144
parser = argparse.ArgumentParser()
145145
parser.add_argument(
146-
'--model', choices=('resnet50', 'resnet101'),
147-
default='resnet50')
146+
'--model',
147+
choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'),
148+
default='mask_rcnn_fpn_resnet50')
148149
parser.add_argument('--batchsize', type=int, default=16)
149150
parser.add_argument('--iteration', type=int, default=90000)
150151
parser.add_argument('--step', type=int, nargs='*', default=[60000, 80000])
@@ -163,11 +164,11 @@ def main():
163164
comm = chainermn.create_communicator(args.communicator)
164165
device = comm.intra_rank
165166

166-
if args.model == 'resnet50':
167+
if args.model == 'mask_rcnn_fpn_resnet50':
167168
model = MaskRCNNFPNResNet50(
168169
n_fg_class=len(coco_instance_segmentation_label_names),
169170
pretrained_model='imagenet')
170-
elif args.model == 'resnet101':
171+
elif args.model == 'mask_rcnn_fpn_resnet101':
171172
model = MaskRCNNFPNResNet101(
172173
n_fg_class=len(coco_instance_segmentation_label_names),
173174
pretrained_model='imagenet')

0 commit comments

Comments
 (0)