18
18
19
19
class MaskHead (chainer .Chain ):
20
20
21
+ """Mask Head network of Mask R-CNN.
22
+
23
+ Args:
24
+ n_class (int): The number of classes including background.
25
+ scales (tuple of floats): The scales of feature maps.
26
+
27
+ """
28
+
21
29
_canonical_level = 2
22
30
_canonical_scale = 224
23
31
_roi_size = 14
@@ -67,6 +75,30 @@ def __call__(self, hs, rois, roi_indices):
67
75
return self .seg (h )
68
76
69
77
def distribute (self , rois , roi_indices ):
78
+ """Assigns feature levels to Rois based on their size.
79
+
80
+ Args:
81
+ rois (array): An array of shape :math:`(R, 4)`, \
82
+ where :math:`R` is the total number of RoIs in the given batch.
83
+ roi_indices (array): An array of shape :math:`(R,)`.
84
+
85
+ Returns:
86
+ two lists and one array:
87
+ :obj:`out_rois`, :obj:`out_roi_indices` and :obj:`order`.
88
+
89
+ * **out_rois**: A list of arrays of shape :math:`(R_l, 4)`, \
90
+ where :math:`R_l` is the number of RoIs in the :math:`l`-th \
91
+ feature map.
92
+ * **out_roi_indices** : A list of arrays of shape :math:`(R_l,)`.
93
+ * **order**: A correspondence between the output and the input. \
94
+ The relationship below is satisfied.
95
+
96
+ .. code:: python
97
+
98
+ xp.concatenate(out_rois, axis=0)[order[i]] == rois[i]
99
+
100
+ """
101
+
70
102
size = self .xp .sqrt (self .xp .prod (rois [:, 2 :] - rois [:, :2 ], axis = 1 ))
71
103
level = self .xp .floor (self .xp .log2 (
72
104
size / self ._canonical_scale + 1e-6 )).astype (np .int32 )
@@ -75,18 +107,39 @@ def distribute(self, rois, roi_indices):
75
107
level + self ._canonical_level , 0 , len (self ._scales ) - 2 )
76
108
77
109
masks = [level == l for l in range (len (self ._scales ))]
78
- rois = [rois [mask ] for mask in masks ]
79
- roi_indices = [roi_indices [mask ] for mask in masks ]
110
+ out_rois = [rois [mask ] for mask in masks ]
111
+ out_roi_indices = [roi_indices [mask ] for mask in masks ]
80
112
order = self .xp .argsort (
81
113
self .xp .concatenate ([self .xp .where (mask )[0 ] for mask in masks ]))
82
- return rois , roi_indices , order
114
+ return out_rois , out_roi_indices , order
83
115
84
116
def decode (self , segms , bboxes , labels , sizes ):
85
- # CPU is used because cv2.resize only accepts numpy arrays.
86
- segms = [chainer .backends .cuda .to_cpu (segm ) for segm in segms ]
87
- bboxes = [chainer .backends .cuda .to_cpu (bbox ) for bbox in bboxes ]
88
- labels = [chainer .backends .cuda .to_cpu (label ) for label in labels ]
89
-
117
+ """Decodes back to masks.
118
+
119
+ Args:
120
+ segms (iterable of arrays): An iterable of arrays of
121
+ shape :math:`(R_n, n\_class, M, M)`.
122
+ bboxes (iterable of arrays): An iterable of arrays of
123
+ shape :math:`(R_n, 4)`.
124
+ labels (iterable of arrays): An iterable of arrays of
125
+ shape :math:`(R_n,)`.
126
+ sizes (list of tuples of two ints): A list of
127
+ :math:`(H_n, W_n)`, where :math:`H_n` and :math:`W_n`
128
+ are height and width of the :math:`n`-th image.
129
+
130
+ Returns:
131
+ list of arrays:
132
+ This list contains instance segmentation for each image
133
+ in the batch.
134
+ More precisely, this is a list of boolean arrays of shape
135
+ :math:`(R'_n, H_n, W_n)`, where :math:`R'_n` is the number of
136
+ bounding boxes in the :math:`n`-th image.
137
+ """
138
+
139
+ xp = chainer .backends .cuda .get_array_module (* segms )
140
+ if xp != np :
141
+ raise ValueError (
142
+ 'MaskHead.decode only supports numpy inputs for now.' )
90
143
masks = []
91
144
# To work around an issue with cv2.resize (it seems to automatically
92
145
# pad with repeated border values), we manually zero-pad the masks by 1
@@ -101,7 +154,7 @@ def decode(self, segms, bboxes, labels, sizes):
101
154
img_H , img_W = size
102
155
mask = np .zeros ((len (bbox ), img_H , img_W ), dtype = np .bool )
103
156
104
- bbox = expand_boxes (bbox , cv2_expand_scale )
157
+ bbox = _expand_boxes (bbox , cv2_expand_scale )
105
158
for i , (bb , sgm , lbl ) in enumerate (zip (bbox , segm , label )):
106
159
bb = bb .astype (np .int32 )
107
160
padded_mask [1 :- 1 , 1 :- 1 ] = sgm [lbl + 1 ]
@@ -124,7 +177,7 @@ def decode(self, segms, bboxes, labels, sizes):
124
177
return masks
125
178
126
179
127
- def expand_boxes (bbox , scale ):
180
+ def _expand_boxes (bbox , scale ):
128
181
"""Expand an array of boxes by a given scale."""
129
182
xp = chainer .backends .cuda .get_array_module (bbox )
130
183
@@ -147,6 +200,42 @@ def expand_boxes(bbox, scale):
147
200
148
201
def mask_loss_pre (rois , roi_indices , gt_masks , gt_head_labels ,
149
202
mask_size ):
203
+ """Loss function for Mask Head (pre).
204
+
205
+ This function processes RoIs for :func:`mask_loss_post` by
206
+ selecting RoIs for mask loss calculation and
207
+ preparing ground truth network output.
208
+
209
+ Args:
210
+ rois (iterable of arrays): An iterable of arrays of
211
+ shape :math:`(R_l, 4)`, where :math:`R_l` is the number
212
+ of RoIs in the :math:`l`-th feature map.
213
+ roi_indices (iterable of arrays): An iterable of arrays of
214
+ shape :math:`(R_l,)`.
215
+ gt_masks (iterable of arrays): A list of arrays whose shape is
216
+ :math:`(R_n, H, W)`, where :math:`R_n` is the number of
217
+ ground truth objects.
218
+ gt_head_labels (iterable of arrays): An iterable of arrays of
219
+ shape :math:`(R_l,)`. This is a collection of ground-truth
220
+ labels assigned to :obj:`rois` during bounding box localization
221
+ stage. The range of value is :math:`(0, n\_class - 1)`.
222
+ mask_size (int): Size of the ground truth network output.
223
+
224
+ Returns:
225
+ tuple of four lists:
226
+ :obj:`mask_rois`, :obj:`mask_roi_indices`,
227
+ :obj:`gt_segms`, and :obj:`gt_mask_labels`.
228
+
229
+ * **rois**: A list of arrays of shape :math:`(R'_l, 4)`, \
230
+ where :math:`R'_l` is the number of RoIs in the :math:`l`-th \
231
+ feature map.
232
+ * **roi_indices**: A list of arrays of shape :math:`(R'_l,)`.
233
+ * **gt_segms**: A list of arrays of shape :math:`(R'_l, M, M). \
234
+ :math:`M` is the argument :obj:`mask_size`.
235
+ * **gt_mask_labels**: A list of arrays of shape :math:`(R'_l,)` \
236
+ indicating the classes of ground truth.
237
+ """
238
+
150
239
xp = cuda .get_array_module (* rois )
151
240
152
241
n_level = len (rois )
@@ -172,7 +261,7 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_head_labels,
172
261
mask_roi = mask_rois [index ]
173
262
iou = bbox_iou (mask_roi , gt_bbox )
174
263
gt_index = iou .argmax (axis = 1 )
175
- gt_segms [index ] = segm_wrt_bbox (
264
+ gt_segms [index ] = _segm_wrt_bbox (
176
265
gt_mask [gt_index ], mask_roi , (mask_size , mask_size ))
177
266
178
267
flag_masks = [mask_roi_levels == l for l in range (n_level )]
@@ -185,8 +274,23 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_head_labels,
185
274
186
275
def mask_loss_post (segms , mask_roi_indices , gt_segms , gt_mask_labels ,
187
276
batchsize ):
188
- # Just compute loss for the foreground class
189
- # divide by the batchsize
277
+ """Loss function for Head (post).
278
+
279
+ Args:
280
+ segms (array): An array whose shape is :math:`(R, n\_class, M, M)`,
281
+ where :math:`R` is the total number of RoIs in the given batch.
282
+ mask_roi_indices (array): A list of arrays returned by
283
+ :func:`mask_loss_pre`.
284
+ gt_segms (list of arrays): A list of arrays returned by
285
+ :func:`mask_loss_pre`.
286
+ gt_mask_labels (list of arrays): A list of arrays returned by
287
+ :func:`mask_loss_pre`.
288
+ batchsize (int): The size of batch.
289
+
290
+ Returns:
291
+ chainer.Variable:
292
+ Mask loss.
293
+ """
190
294
xp = cuda .get_array_module (segms .array )
191
295
192
296
mask_roi_indices = xp .hstack (mask_roi_indices ).astype (np .int32 )
@@ -206,7 +310,7 @@ def mask_loss_post(segms, mask_roi_indices, gt_segms, gt_mask_labels,
206
310
return mask_loss
207
311
208
312
209
- def segm_wrt_bbox (mask , bbox , size ):
313
+ def _segm_wrt_bbox (mask , bbox , size ):
210
314
xp = chainer .backends .cuda .get_array_module (mask )
211
315
212
316
bbox = bbox .astype (np .int32 )
0 commit comments