chainer
diff --git a/‎chainercv/links/model/mask_rcnn/mask_head.py
+118-14 b/‎chainercv/links/model/mask_rcnn/mask_head.py
+118-14
diff --git a/‎chainercv/links/model/mask_rcnn/mask_rcnn.py
+8-8 b/‎chainercv/links/model/mask_rcnn/mask_rcnn.py
+8-8
diff --git a/‎chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py
+65 b/‎chainercv/links/model/mask_rcnn/mask_rcnn_fpn_resnet.py
+65
diff --git a/‎examples/mask_rcnn/demo.py
+8-13 b/‎examples/mask_rcnn/demo.py
+8-13
diff --git a/‎examples/mask_rcnn/train_multi.py
+5-4 b/‎examples/mask_rcnn/train_multi.py
+5-4
@@ -18,6 +18,14 @@
 
 class MaskHead(chainer.Chain):
 
+    """Mask Head network of Mask R-CNN.
+
+    Args:
+        n_class (int): The number of classes including background.
+        scales (tuple of floats): The scales of feature maps.
+
+    """
+
     _canonical_level = 2
     _canonical_scale = 224
     _roi_size = 14
@@ -67,6 +75,30 @@ def __call__(self, hs, rois, roi_indices):
         return self.seg(h)
 
     def distribute(self, rois, roi_indices):
+        """Assigns feature levels to Rois based on their size.
+
+        Args:
+            rois (array): An array of shape :math:`(R, 4)`, \
+                where :math:`R` is the total number of RoIs in the given batch.
+            roi_indices (array): An array of shape :math:`(R,)`.
+
+        Returns:
+            two lists and one array:
+            :obj:`out_rois`, :obj:`out_roi_indices` and :obj:`order`.
+
+            * **out_rois**: A list of arrays of shape :math:`(R_l, 4)`, \
+                where :math:`R_l` is the number of RoIs in the :math:`l`-th \
+                feature map.
+            * **out_roi_indices** : A list of arrays of shape :math:`(R_l,)`.
+            * **order**: A correspondence between the output and the input. \
+                The relationship below is satisfied.
+
+            .. code:: python
+
+                xp.concatenate(out_rois, axis=0)[order[i]] == rois[i]
+
+        """
+
         size = self.xp.sqrt(self.xp.prod(rois[:, 2:] - rois[:, :2], axis=1))
         level = self.xp.floor(self.xp.log2(
             size / self._canonical_scale + 1e-6)).astype(np.int32)
@@ -75,18 +107,39 @@ def distribute(self, rois, roi_indices):
             level + self._canonical_level, 0, len(self._scales) - 2)
 
         masks = [level == l for l in range(len(self._scales))]
-        rois = [rois[mask] for mask in masks]
-        roi_indices = [roi_indices[mask] for mask in masks]
+        out_rois = [rois[mask] for mask in masks]
+        out_roi_indices = [roi_indices[mask] for mask in masks]
         order = self.xp.argsort(
             self.xp.concatenate([self.xp.where(mask)[0] for mask in masks]))
-        return rois, roi_indices, order
+        return out_rois, out_roi_indices, order
 
     def decode(self, segms, bboxes, labels, sizes):
-        # CPU is used because cv2.resize only accepts numpy arrays.
-        segms = [chainer.backends.cuda.to_cpu(segm) for segm in segms]
-        bboxes = [chainer.backends.cuda.to_cpu(bbox) for bbox in bboxes]
-        labels = [chainer.backends.cuda.to_cpu(label) for label in labels]
-
+        """Decodes back to masks.
+
+        Args:
+            segms (iterable of arrays): An iterable of arrays of
+                shape :math:`(R_n, n\_class, M, M)`.
+            bboxes (iterable of arrays): An iterable of arrays of
+                shape :math:`(R_n, 4)`.
+            labels (iterable of arrays): An iterable of arrays of
+                shape :math:`(R_n,)`.
+            sizes (list of tuples of two ints): A list of
+                :math:`(H_n, W_n)`, where :math:`H_n` and :math:`W_n`
+                are height and width of the :math:`n`-th image.
+
+        Returns:
+            list of arrays:
+            This list contains instance segmentation for each image
+            in the batch.
+            More precisely, this is a list of boolean arrays of shape
+            :math:`(R'_n, H_n, W_n)`, where :math:`R'_n` is the number of
+            bounding boxes in the :math:`n`-th image.
+        """
+
+        xp = chainer.backends.cuda.get_array_module(*segms)
+        if xp != np:
+            raise ValueError(
+                'MaskHead.decode only supports numpy inputs for now.')
         masks = []
         # To work around an issue with cv2.resize (it seems to automatically
         # pad with repeated border values), we manually zero-pad the masks by 1
@@ -101,7 +154,7 @@ def decode(self, segms, bboxes, labels, sizes):
             img_H, img_W = size
             mask = np.zeros((len(bbox), img_H, img_W), dtype=np.bool)
 
-            bbox = expand_boxes(bbox, cv2_expand_scale)
+            bbox = _expand_boxes(bbox, cv2_expand_scale)
             for i, (bb, sgm, lbl) in enumerate(zip(bbox, segm, label)):
                 bb = bb.astype(np.int32)
                 padded_mask[1:-1, 1:-1] = sgm[lbl + 1]
@@ -124,7 +177,7 @@ def decode(self, segms, bboxes, labels, sizes):
         return masks
 
 
-def expand_boxes(bbox, scale):
+def _expand_boxes(bbox, scale):
     """Expand an array of boxes by a given scale."""
     xp = chainer.backends.cuda.get_array_module(bbox)
 
@@ -147,6 +200,42 @@ def expand_boxes(bbox, scale):
 
 def mask_loss_pre(rois, roi_indices, gt_masks, gt_head_labels,
                   mask_size):
+    """Loss function for Mask Head (pre).
+
+    This function processes RoIs for :func:`mask_loss_post` by
+    selecting RoIs for mask loss calculation and
+    preparing ground truth network output.
+
+    Args:
+        rois (iterable of arrays): An iterable of arrays of
+            shape :math:`(R_l, 4)`, where :math:`R_l` is the number
+            of RoIs in the :math:`l`-th feature map.
+        roi_indices (iterable of arrays): An iterable of arrays of
+            shape :math:`(R_l,)`.
+        gt_masks (iterable of arrays): A list of arrays whose shape is
+            :math:`(R_n, H, W)`, where :math:`R_n` is the number of
+            ground truth objects.
+        gt_head_labels (iterable of arrays): An iterable of arrays of
+            shape :math:`(R_l,)`. This is a collection of ground-truth
+            labels assigned to :obj:`rois` during bounding box localization
+            stage. The range of value is :math:`(0, n\_class - 1)`.
+        mask_size (int): Size of the ground truth network output.
+
+    Returns:
+        tuple of four lists:
+        :obj:`mask_rois`, :obj:`mask_roi_indices`,
+        :obj:`gt_segms`, and :obj:`gt_mask_labels`.
+
+        * **rois**: A list of arrays of shape :math:`(R'_l, 4)`, \
+            where :math:`R'_l` is the number of RoIs in the :math:`l`-th \
+            feature map.
+        * **roi_indices**: A list of arrays of shape :math:`(R'_l,)`.
+        * **gt_segms**: A list of arrays of shape :math:`(R'_l, M, M). \
+            :math:`M` is the argument :obj:`mask_size`.
+        * **gt_mask_labels**: A list of arrays of shape :math:`(R'_l,)` \
+            indicating the classes of ground truth.
+    """
+
     xp = cuda.get_array_module(*rois)
 
     n_level = len(rois)
@@ -172,7 +261,7 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_head_labels,
         mask_roi = mask_rois[index]
         iou = bbox_iou(mask_roi, gt_bbox)
         gt_index = iou.argmax(axis=1)
-        gt_segms[index] = segm_wrt_bbox(
+        gt_segms[index] = _segm_wrt_bbox(
             gt_mask[gt_index], mask_roi, (mask_size, mask_size))
 
     flag_masks = [mask_roi_levels == l for l in range(n_level)]
@@ -185,8 +274,23 @@ def mask_loss_pre(rois, roi_indices, gt_masks, gt_head_labels,
 
 def mask_loss_post(segms, mask_roi_indices, gt_segms, gt_mask_labels,
                    batchsize):
-    # Just compute loss for the foreground class
-    # divide by the batchsize
+    """Loss function for Head (post).
+
+     Args:
+         segms (array): An array whose shape is :math:`(R, n\_class, M, M)`,
+             where :math:`R` is the total number of RoIs in the given batch.
+         mask_roi_indices (array): A list of arrays returned by
+             :func:`mask_loss_pre`.
+         gt_segms (list of arrays): A list of arrays returned by
+             :func:`mask_loss_pre`.
+         gt_mask_labels (list of arrays): A list of arrays returned by
+             :func:`mask_loss_pre`.
+         batchsize (int): The size of batch.
+
+     Returns:
+        chainer.Variable:
+        Mask loss.
+    """
     xp = cuda.get_array_module(segms.array)
 
     mask_roi_indices = xp.hstack(mask_roi_indices).astype(np.int32)
@@ -206,7 +310,7 @@ def mask_loss_post(segms, mask_roi_indices, gt_segms, gt_mask_labels,
     return mask_loss
 
 
-def segm_wrt_bbox(mask, bbox, size):
+def _segm_wrt_bbox(mask, bbox, size):
     xp = chainer.backends.cuda.get_array_module(mask)
 
     bbox = bbox.astype(np.int32)
 
@@ -1,7 +1,6 @@
 from __future__ import division
 
 import numpy as np
-import PIL
 
 import chainer
 from chainer.backends import cuda
@@ -153,13 +152,13 @@ def predict(self, imgs):
                      dtype=np.float32)
                  for segm in segms]
 
+        segms = [chainer.backends.cuda.to_cpu(segm) for segm in segms]
+        bboxes = [chainer.backends.cuda.to_cpu(bbox / scale)
+                  for bbox, scale in zip(rescaled_bboxes, scales)]
+        labels = [chainer.backends.cuda.to_cpu(label) for label in labels]
         masks = self.mask_head.decode(
-            segms,
-            [bbox / scale for bbox, scale in zip(rescaled_bboxes, scales)],
-            labels, sizes)
+            segms, bboxes, labels, sizes)
 
-        masks = [cuda.to_cpu(mask) for mask in masks]
-        labels = [cuda.to_cpu(label) for label in labels]
         scores = [cuda.to_cpu(score) for score in scores]
         return masks, labels, scores
 
@@ -172,8 +171,9 @@ def prepare(self, imgs, masks=None):
                 and the range of their value is :math:`[0, 255]`.
 
         Returns:
-            Two arrays: preprocessed images and \
-            scales that were caluclated in prepocessing.
+            Three arrays: preprocessed images, \
+            scales that were caluclated in prepocessing and
+            the size of the images after resizing.
 
         """
         scales = []
 
@@ -17,6 +17,11 @@
 
 class MaskRCNNFPNResNet(MaskRCNN):
 
+    """Base class for Mask R-CNN with ResNet backbone.
+
+    A subclass of this class should have :obj:`_base` and :obj:`_models`.
+    """
+
     def __init__(self, n_fg_class=None, pretrained_model=None):
         param, path = utils.prepare_pretrained_model(
             {'n_fg_class': n_fg_class}, pretrained_model, self._models)
@@ -46,6 +51,36 @@ def __init__(self, n_fg_class=None, pretrained_model=None):
 
 class MaskRCNNFPNResNet50(MaskRCNNFPNResNet):
 
+    """Mask R-CNN with ResNet-50.
+
+    This is a model of Mask R-CNN [#]_.
+    This model uses :class:`~chainercv.links.ResNet50` as
+    its base feature extractor.
+
+    .. [#] Kaiming He et al. Mask R-CNN. ICCV 2017
+
+    Args:
+       n_fg_class (int): The number of classes excluding the background.
+       pretrained_model (string): The weight file to be loaded.
+           This can take :obj:`'coco'`, `filepath` or :obj:`None`.
+           The default value is :obj:`None`.
+
+            * :obj:`'coco'`: Load weights trained on train split of \
+                MS COCO 2017. \
+                The weight file is downloaded and cached automatically. \
+                :obj:`n_fg_class` must be :obj:`80` or :obj:`None`.
+            * :obj:`'imagenet'`: Load weights of ResNet-50 trained on \
+                ImageNet. \
+                The weight file is downloaded and cached automatically. \
+                This option initializes weights partially and the rests are \
+                initialized randomly. In this case, :obj:`n_fg_class` \
+                can be set to any number.
+            * `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \
+                must be specified properly.
+            * :obj:`None`: Do not load weights.
+
+    """
+
     _base = ResNet50
     _models = {
         'coco': {
@@ -58,6 +93,36 @@ class MaskRCNNFPNResNet50(MaskRCNNFPNResNet):
 
 class MaskRCNNFPNResNet101(MaskRCNNFPNResNet):
 
+    """Mask R-CNN with ResNet-101.
+
+    This is a model of Mask R-CNN [#]_.
+    This model uses :class:`~chainercv.links.ResNet101` as
+    its base feature extractor.
+
+    .. [#] Kaiming He et al. Mask R-CNN. ICCV 2017
+
+    Args:
+       n_fg_class (int): The number of classes excluding the background.
+       pretrained_model (string): The weight file to be loaded.
+           This can take :obj:`'coco'`, `filepath` or :obj:`None`.
+           The default value is :obj:`None`.
+
+            * :obj:`'coco'`: Load weights trained on train split of \
+                MS COCO 2017. \
+                The weight file is downloaded and cached automatically. \
+                :obj:`n_fg_class` must be :obj:`80` or :obj:`None`.
+            * :obj:`'imagenet'`: Load weights of ResNet-101 trained on \
+                ImageNet. \
+                The weight file is downloaded and cached automatically. \
+                This option initializes weights partially and the rests are \
+                initialized randomly. In this case, :obj:`n_fg_class` \
+                can be set to any number.
+            * `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \
+                must be specified properly.
+            * :obj:`None`: Do not load weights.
+
+    """
+
     _base = ResNet101
     _models = {
         'coco': {
 
@@ -14,18 +14,22 @@
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument('--gpu', type=int, default=-1)
-    parser.add_argument('--model', choices=('resnet50', 'resnet101'))
+    parser.add_argument(
+        '--model',
+        choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'),
+        default='mask_rcnn_fpn_resnet50'
+    )
     group = parser.add_mutually_exclusive_group()
     group.add_argument('--pretrained-model')
     group.add_argument('--snapshot')
     parser.add_argument('image')
     args = parser.parse_args()
 
-    if args.model == 'resnet50':
+    if args.model == 'mask_rcnn_fpn_resnet50':
         model = MaskRCNNFPNResNet50(
             n_fg_class=len(coco_instance_segmentation_label_names),
             pretrained_model=args.pretrained_model)
-    elif args.model == 'resnet101':
+    elif args.model == 'mask_rcnn_fpn_resnet101':
         model = MaskRCNNFPNResNet101(
             n_fg_class=len(coco_instance_segmentation_label_names),
             pretrained_model=args.pretrained_model)
@@ -35,21 +39,12 @@ def main():
         model.to_gpu()
 
     img = utils.read_image(args.image)
-    # bboxes, masks, labels, scores = model.predict([img])
     masks, labels, scores = model.predict([img])
-    # bbox = bboxes[0]
     mask = masks[0]
     label = labels[0]
     score = scores[0]
-
-    # chainercv.visualizations.vis_bbox(
-    #     img, bbox, label, score, label_names=coco_bbox_label_names)
-
-    import numpy as np
-    # flag = np.array([bb[3] - bb[1] < 300 for bb in bbox], dtype=np.bool)
-    flag = np.ones(len(mask), dtype=np.bool)
     chainercv.visualizations.vis_instance_segmentation(
-        img, mask[flag], label[flag], score[flag],
+        img, mask, label, score,
         label_names=coco_instance_segmentation_label_names)
     plt.show()
 
 
@@ -143,8 +143,9 @@ def copyparams(dst, src):
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        '--model', choices=('resnet50', 'resnet101'),
-        default='resnet50')
+        '--model',
+        choices=('mask_rcnn_fpn_resnet50', 'mask_rcnn_fpn_resnet101'),
+        default='mask_rcnn_fpn_resnet50')
     parser.add_argument('--batchsize', type=int, default=16)
     parser.add_argument('--iteration', type=int, default=90000)
     parser.add_argument('--step', type=int, nargs='*', default=[60000, 80000])
@@ -163,11 +164,11 @@ def main():
     comm = chainermn.create_communicator(args.communicator)
     device = comm.intra_rank
 
-    if args.model == 'resnet50':
+    if args.model == 'mask_rcnn_fpn_resnet50':
         model = MaskRCNNFPNResNet50(
             n_fg_class=len(coco_instance_segmentation_label_names),
             pretrained_model='imagenet')
-    elif args.model == 'resnet101':
+    elif args.model == 'mask_rcnn_fpn_resnet101':
         model = MaskRCNNFPNResNet101(
             n_fg_class=len(coco_instance_segmentation_label_names),
             pretrained_model='imagenet')