From 6bebf480b8fd330ef949238270e03dad0275ecca Mon Sep 17 00:00:00 2001
From: zytx121 <592267829@qq.com>
Date: Fri, 19 Aug 2022 15:30:29 +0800
Subject: [PATCH 1/3] init

---
 configs/_base_/datasets/dota.py               | 88 ++++++++++++++++++
 configs/_base_/datasets/dotav1.py             | 47 ----------
 ...ated_retinanet_obb_r50_fpn_1x_dota_le90.py | 61 +++++-------
 ...otated_retinanet_obb_r50_fpn_1x_dota_oc.py | 92 +------------------
 mmrotate/core/bbox/__init__.py                |  4 +-
 mmrotate/core/bbox/coder/__init__.py          |  4 +-
 ...ox_coder.py => delta_xywht_rbbox_coder.py} | 62 +++++++------
 .../rotate_iou2d_calculator.py                |  3 +-
 8 files changed, 156 insertions(+), 205 deletions(-)
 create mode 100644 configs/_base_/datasets/dota.py
 delete mode 100644 configs/_base_/datasets/dotav1.py
 rename mmrotate/core/bbox/coder/{delta_xywha_obbox_coder.py => delta_xywht_rbbox_coder.py} (85%)

diff --git a/configs/_base_/datasets/dota.py b/configs/_base_/datasets/dota.py
new file mode 100644
index 000000000..95df7355b
--- /dev/null
+++ b/configs/_base_/datasets/dota.py
@@ -0,0 +1,88 @@
+# dataset settings
+dataset_type = 'DOTADataset'
+data_root = 'data/split_ss_dota/'
+file_client_args = dict(backend='disk')
+
+train_pipeline = [
+    dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args),
+    dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'),
+    dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')),
+    dict(type='mmdet.Resize', scale=(1024, 2014), keep_ratio=True),
+    dict(type='mmdet.RandomFlip', prob=0.75, direction=['horizontal', 'vertical', 'diagonal']),
+    dict(type='mmdet.PackDetInputs')
+]
+val_pipeline = [
+    dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args),
+    dict(type='mmdet.Resize', scale=(1024, 2014), keep_ratio=True),
+    # avoid bboxes being resized
+    dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'),
+    dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')),
+    dict(
+        type='mmdet.PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+test_pipeline = [
+    dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args),
+    dict(type='mmdet.Resize', scale=(1024, 2014), keep_ratio=True),
+    # avoid bboxes being resized
+    dict(
+        type='mmdet.PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    batch_sampler=None,
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='trainval/annfiles/',
+        data_prefix=dict(img_path='trainval/images/'),
+        img_shape=(1024, 1024),
+        filter_cfg=dict(filter_empty_gt=True),
+        pipeline=train_pipeline))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='trainval/annfiles/',
+        data_prefix=dict(img_path='trainval/images/'),
+        img_shape=(1024, 1024),
+        test_mode=True,
+        pipeline=val_pipeline))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+    type='DOTAMetric',
+    metric='mAP')
+test_evaluator = val_evaluator
+
+# inference on test dataset and
+# format the output results for submission.
+# test_dataloader = dict(
+#     batch_size=1,
+#     num_workers=2,
+#     persistent_workers=True,
+#     drop_last=False,
+#     sampler=dict(type='DefaultSampler', shuffle=False),
+#     dataset=dict(
+#         type=dataset_type,
+#         data_root=data_root,
+#         data_prefix=dict(img_path='test/images/'),
+#         img_shape=(1024, 1024),
+#         test_mode=True,
+#         pipeline=test_pipeline))
+# test_evaluator = dict(
+#     type='DOTAMetric',
+#     format_only=True,
+#     merge_patches=True,
+#     outfile_prefix='./work_dirs/dota/Task1')
diff --git a/configs/_base_/datasets/dotav1.py b/configs/_base_/datasets/dotav1.py
deleted file mode 100644
index c586d7bbb..000000000
--- a/configs/_base_/datasets/dotav1.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# dataset settings
-dataset_type = 'DOTADataset'
-data_root = 'data/split_1024_dota1_0/'
-img_norm_cfg = dict(
-    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
-train_pipeline = [
-    dict(type='LoadImageFromFile'),
-    dict(type='LoadAnnotations', with_bbox=True),
-    dict(type='RResize', img_scale=(1024, 1024)),
-    dict(type='RRandomFlip', flip_ratio=0.5),
-    dict(type='Normalize', **img_norm_cfg),
-    dict(type='Pad', size_divisor=32),
-    dict(type='DefaultFormatBundle'),
-    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
-]
-test_pipeline = [
-    dict(type='LoadImageFromFile'),
-    dict(
-        type='MultiScaleFlipAug',
-        img_scale=(1024, 1024),
-        flip=False,
-        transforms=[
-            dict(type='RResize'),
-            dict(type='Normalize', **img_norm_cfg),
-            dict(type='Pad', size_divisor=32),
-            dict(type='DefaultFormatBundle'),
-            dict(type='Collect', keys=['img'])
-        ])
-]
-data = dict(
-    samples_per_gpu=2,
-    workers_per_gpu=2,
-    train=dict(
-        type=dataset_type,
-        ann_file=data_root + 'trainval/annfiles/',
-        img_prefix=data_root + 'trainval/images/',
-        pipeline=train_pipeline),
-    val=dict(
-        type=dataset_type,
-        ann_file=data_root + 'trainval/annfiles/',
-        img_prefix=data_root + 'trainval/images/',
-        pipeline=test_pipeline),
-    test=dict(
-        type=dataset_type,
-        ann_file=data_root + 'test/images/',
-        img_prefix=data_root + 'test/images/',
-        pipeline=test_pipeline))
diff --git a/configs/rotated_retinanet/rotated_retinanet_obb_r50_fpn_1x_dota_le90.py b/configs/rotated_retinanet/rotated_retinanet_obb_r50_fpn_1x_dota_le90.py
index 5e6e8ddb4..edf16ec84 100644
--- a/configs/rotated_retinanet/rotated_retinanet_obb_r50_fpn_1x_dota_le90.py
+++ b/configs/rotated_retinanet/rotated_retinanet_obb_r50_fpn_1x_dota_le90.py
@@ -1,65 +1,73 @@
 _base_ = [
-    '../_base_/datasets/dotav1.py', '../_base_/schedules/schedule_1x.py',
+    '../_base_/datasets/dota.py', '../_base_/schedules/schedule_1x.py',
     '../_base_/default_runtime.py'
 ]
-
 angle_version = 'le90'
+
 model = dict(
     type='RotatedRetinaNet',
+    data_preprocessor=dict(
+        type='mmdet.DetDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32,
+        with_box_wrapped=True),
     backbone=dict(
-        type='ResNet',
+        type='mmdet.ResNet',
         depth=50,
         num_stages=4,
         out_indices=(0, 1, 2, 3),
         frozen_stages=1,
-        zero_init_residual=False,
         norm_cfg=dict(type='BN', requires_grad=True),
         norm_eval=True,
         style='pytorch',
         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
     neck=dict(
-        type='FPN',
+        type='mmdet.FPN',
         in_channels=[256, 512, 1024, 2048],
         out_channels=256,
         start_level=1,
         add_extra_convs='on_input',
         num_outs=5),
     bbox_head=dict(
-        type='RotatedRetinaHead',
+        type='mmdet.RetinaHead',
         num_classes=15,
         in_channels=256,
         stacked_convs=4,
         feat_channels=256,
-        assign_by_circumhbbox=None,
         anchor_generator=dict(
-            type='RotatedAnchorGenerator',
+            type='FakeRotatedAnchorGenerator',
+            angle_version=angle_version,
             octave_base_scale=4,
             scales_per_octave=3,
             ratios=[1.0, 0.5, 2.0],
             strides=[8, 16, 32, 64, 128]),
         bbox_coder=dict(
-            type='DeltaXYWHAOBBoxCoder',
-            angle_range=angle_version,
+            type='DeltaXYWHTRBBoxCoder',
+            angle_version=angle_version,
             norm_factor=None,
-            edge_swap=True,
-            proj_xy=True,
+            edge_swap=False,
+            proj_xy=False,
             target_means=(.0, .0, .0, .0, .0),
             target_stds=(1.0, 1.0, 1.0, 1.0, 1.0)),
         loss_cls=dict(
-            type='FocalLoss',
+            type='mmdet.FocalLoss',
             use_sigmoid=True,
             gamma=2.0,
             alpha=0.25,
             loss_weight=1.0),
-        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+        loss_bbox=dict(type='mmdet.L1Loss', loss_weight=1.0)),
     train_cfg=dict(
         assigner=dict(
-            type='MaxIoUAssigner',
+            type='mmdet.MaxIoUAssigner',
             pos_iou_thr=0.5,
             neg_iou_thr=0.4,
             min_pos_iou=0,
             ignore_iof_thr=-1,
             iou_calculator=dict(type='RBboxOverlaps2D')),
+        sampler=dict(
+            type='mmdet.PseudoSampler'),  # Focal loss should use PseudoSampler
         allowed_border=-1,
         pos_weight=-1,
         debug=False),
@@ -67,26 +75,5 @@
         nms_pre=2000,
         min_bbox_size=0,
         score_thr=0.05,
-        nms=dict(iou_thr=0.1),
+        nms=dict(type='nms_rotated', iou_threshold=0.1),
         max_per_img=2000))
-
-img_norm_cfg = dict(
-    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
-train_pipeline = [
-    dict(type='LoadImageFromFile'),
-    dict(type='LoadAnnotations', with_bbox=True),
-    dict(type='RResize', img_scale=(1024, 1024)),
-    dict(
-        type='RRandomFlip',
-        flip_ratio=[0.25, 0.25, 0.25],
-        direction=['horizontal', 'vertical', 'diagonal'],
-        version=angle_version),
-    dict(type='Normalize', **img_norm_cfg),
-    dict(type='Pad', size_divisor=32),
-    dict(type='DefaultFormatBundle'),
-    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
-]
-data = dict(
-    train=dict(pipeline=train_pipeline, version=angle_version),
-    val=dict(version=angle_version),
-    test=dict(version=angle_version))
diff --git a/configs/rotated_retinanet/rotated_retinanet_obb_r50_fpn_1x_dota_oc.py b/configs/rotated_retinanet/rotated_retinanet_obb_r50_fpn_1x_dota_oc.py
index 1a729de45..e9d2a5154 100644
--- a/configs/rotated_retinanet/rotated_retinanet_obb_r50_fpn_1x_dota_oc.py
+++ b/configs/rotated_retinanet/rotated_retinanet_obb_r50_fpn_1x_dota_oc.py
@@ -1,92 +1,8 @@
-_base_ = [
-    '../_base_/datasets/dotav1.py', '../_base_/schedules/schedule_1x.py',
-    '../_base_/default_runtime.py'
-]
+_base_ = 'rotated_retinanet_obb_r50_fpn_1x_dota_le90.py'
 
 angle_version = 'oc'
+
 model = dict(
-    type='RotatedRetinaNet',
-    backbone=dict(
-        type='ResNet',
-        depth=50,
-        num_stages=4,
-        out_indices=(0, 1, 2, 3),
-        frozen_stages=1,
-        zero_init_residual=False,
-        norm_cfg=dict(type='BN', requires_grad=True),
-        norm_eval=True,
-        style='pytorch',
-        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
-    neck=dict(
-        type='FPN',
-        in_channels=[256, 512, 1024, 2048],
-        out_channels=256,
-        start_level=1,
-        add_extra_convs='on_input',
-        num_outs=5),
     bbox_head=dict(
-        type='RotatedRetinaHead',
-        num_classes=15,
-        in_channels=256,
-        stacked_convs=4,
-        feat_channels=256,
-        assign_by_circumhbbox=None,
-        anchor_generator=dict(
-            type='RotatedAnchorGenerator',
-            octave_base_scale=4,
-            scales_per_octave=3,
-            ratios=[1.0, 0.5, 2.0],
-            strides=[8, 16, 32, 64, 128]),
-        bbox_coder=dict(
-            type='DeltaXYWHAOBBoxCoder',
-            angle_range=angle_version,
-            norm_factor=None,
-            edge_swap=False,
-            proj_xy=False,
-            target_means=(.0, .0, .0, .0, .0),
-            target_stds=(1.0, 1.0, 1.0, 1.0, 1.0)),
-        loss_cls=dict(
-            type='FocalLoss',
-            use_sigmoid=True,
-            gamma=2.0,
-            alpha=0.25,
-            loss_weight=1.0),
-        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
-    train_cfg=dict(
-        assigner=dict(
-            type='MaxIoUAssigner',
-            pos_iou_thr=0.5,
-            neg_iou_thr=0.4,
-            min_pos_iou=0,
-            ignore_iof_thr=-1,
-            iou_calculator=dict(type='RBboxOverlaps2D')),
-        allowed_border=-1,
-        pos_weight=-1,
-        debug=False),
-    test_cfg=dict(
-        nms_pre=2000,
-        min_bbox_size=0,
-        score_thr=0.05,
-        nms=dict(iou_thr=0.1),
-        max_per_img=2000))
-
-img_norm_cfg = dict(
-    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
-train_pipeline = [
-    dict(type='LoadImageFromFile'),
-    dict(type='LoadAnnotations', with_bbox=True),
-    dict(type='RResize', img_scale=(1024, 1024)),
-    dict(
-        type='RRandomFlip',
-        flip_ratio=[0.25, 0.25, 0.25],
-        direction=['horizontal', 'vertical', 'diagonal'],
-        version=angle_version),
-    dict(type='Normalize', **img_norm_cfg),
-    dict(type='Pad', size_divisor=32),
-    dict(type='DefaultFormatBundle'),
-    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
-]
-data = dict(
-    train=dict(pipeline=train_pipeline, version=angle_version),
-    val=dict(version=angle_version),
-    test=dict(version=angle_version))
+        anchor_generator=dict(angle_version=angle_version),
+        bbox_coder=dict(angle_version=angle_version)))
diff --git a/mmrotate/core/bbox/__init__.py b/mmrotate/core/bbox/__init__.py
index daf6d5a43..198e0cb3e 100644
--- a/mmrotate/core/bbox/__init__.py
+++ b/mmrotate/core/bbox/__init__.py
@@ -1,7 +1,7 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .assigners import (ATSSKldAssigner, ATSSObbAssigner, ConvexAssigner,
                         MaxConvexIoUAssigner, SASAssigner)
-from .coder import (CSLCoder, DeltaXYWHAHBBoxCoder, DeltaXYWHAOBBoxCoder,
+from .coder import (CSLCoder, DeltaXYWHAHBBoxCoder, DeltaXYWHTRBBoxCoder,
                     GVFixCoder, GVRatioCoder, MidpointOffsetCoder)
 from .iou_calculators import RBboxOverlaps2D, rbbox_overlaps
 from .samplers import RRandomSampler
@@ -14,7 +14,7 @@
 __all__ = [
     'RBboxOverlaps2D', 'rbbox_overlaps', 'rbbox2result', 'rbbox2roi',
     'norm_angle', 'poly2obb', 'poly2obb_np', 'obb2poly', 'obb2hbb', 'obb2xyxy',
-    'hbb2obb', 'obb2poly_np', 'RRandomSampler', 'DeltaXYWHAOBBoxCoder',
+    'hbb2obb', 'obb2poly_np', 'RRandomSampler', 'DeltaXYWHTRBBoxCoder',
     'DeltaXYWHAHBBoxCoder', 'MidpointOffsetCoder', 'GVFixCoder',
     'GVRatioCoder', 'ConvexAssigner', 'MaxConvexIoUAssigner', 'SASAssigner',
     'ATSSKldAssigner', 'gaussian2bbox', 'gt2gaussian', 'GaussianMixture',
diff --git a/mmrotate/core/bbox/coder/__init__.py b/mmrotate/core/bbox/coder/__init__.py
index af6c1c74d..3ad9bef0e 100644
--- a/mmrotate/core/bbox/coder/__init__.py
+++ b/mmrotate/core/bbox/coder/__init__.py
@@ -2,11 +2,11 @@
 from .angle_coder import CSLCoder
 from .delta_midpointoffset_rbbox_coder import MidpointOffsetCoder
 from .delta_xywha_hbbox_coder import DeltaXYWHAHBBoxCoder
-from .delta_xywha_obbox_coder import DeltaXYWHAOBBoxCoder
+from .delta_xywht_rbbox_coder import DeltaXYWHTRBBoxCoder
 from .distance_angle_point_coder import DistanceAnglePointCoder
 from .gliding_vertex_coder import GVFixCoder, GVRatioCoder
 
 __all__ = [
-    'DeltaXYWHAOBBoxCoder', 'DeltaXYWHAHBBoxCoder', 'MidpointOffsetCoder',
+    'DeltaXYWHTRBBoxCoder', 'DeltaXYWHAHBBoxCoder', 'MidpointOffsetCoder',
     'GVFixCoder', 'GVRatioCoder', 'CSLCoder', 'DistanceAnglePointCoder'
 ]
diff --git a/mmrotate/core/bbox/coder/delta_xywha_obbox_coder.py b/mmrotate/core/bbox/coder/delta_xywht_rbbox_coder.py
similarity index 85%
rename from mmrotate/core/bbox/coder/delta_xywha_obbox_coder.py
rename to mmrotate/core/bbox/coder/delta_xywht_rbbox_coder.py
index 04b4a79c5..8b402c593 100644
--- a/mmrotate/core/bbox/coder/delta_xywha_obbox_coder.py
+++ b/mmrotate/core/bbox/coder/delta_xywht_rbbox_coder.py
@@ -4,23 +4,24 @@
 import torch
 from mmdet.models.task_modules.coders.base_bbox_coder import BaseBBoxCoder
 
+from mmrotate.core.bbox.structures import RotatedBoxes
 from mmrotate.registry import TASK_UTILS
-from ..transforms import norm_angle
+from mmrotate.structures.bbox import norm_angle
 
 
 @TASK_UTILS.register_module()
-class DeltaXYWHAOBBoxCoder(BaseBBoxCoder):
-    """Delta XYWHA OBBox coder. This coder is used for rotated objects
+class DeltaXYWHTRBBoxCoder(BaseBBoxCoder):
+    """Delta XYWHT RBBox coder. This coder is used for rotated objects
     detection (for example on task1 of DOTA dataset). this coder encodes bbox
-    (xc, yc, w, h, a) into delta (dx, dy, dw, dh, da) and decodes delta (dx,
-    dy, dw, dh, da) back to original bbox (xc, yc, w, h, a).
+    (xc, yc, w, h, t) into delta (dx, dy, dw, dh, da) and decodes delta (dx,
+    dy, dw, dh, da) back to original bbox (xc, yc, w, h, t).
 
     Args:
         target_means (Sequence[float]): Denormalizing means of target for
             delta coordinates
         target_stds (Sequence[float]): Denormalizing standard deviation of
             target for delta coordinates
-        angle_range (str, optional): Angle representations. Defaults to 'oc'.
+        angle_version (str, optional): Angle representations. Defaults to 'oc'.
         norm_factor (None|float, optional): Regularization factor of angle.
         edge_swap (bool, optional): Whether swap the edge if w < h.
             Defaults to False.
@@ -32,22 +33,24 @@ class DeltaXYWHAOBBoxCoder(BaseBBoxCoder):
         ctr_clamp (int): the maximum pixel shift to clamp. Only used by
             YOLOF. Default 32.
     """
+    encode_bbox_dim = 5
+    decode_bbox_dim = 5
 
     def __init__(self,
                  target_means=(0., 0., 0., 0., 0.),
                  target_stds=(1., 1., 1., 1., 1.),
-                 angle_range='oc',
+                 angle_version='oc',
                  norm_factor=None,
                  edge_swap=False,
                  proj_xy=False,
                  add_ctr_clamp=False,
                  ctr_clamp=32):
-        super(BaseBBoxCoder, self).__init__()
+        super().__init__()
         self.means = target_means
         self.stds = target_stds
         self.add_ctr_clamp = add_ctr_clamp
         self.ctr_clamp = ctr_clamp
-        self.angle_range = angle_range
+        self.angle_version = angle_version
         self.norm_factor = norm_factor
         self.edge_swap = edge_swap
         self.proj_xy = proj_xy
@@ -67,9 +70,9 @@ def encode(self, bboxes, gt_bboxes):
         assert bboxes.size(0) == gt_bboxes.size(0)
         assert bboxes.size(-1) == 5
         assert gt_bboxes.size(-1) == 5
-        if self.angle_range in ['oc', 'le135', 'le90']:
+        if self.angle_version in ['oc', 'le135', 'le90']:
             return bbox2delta(bboxes, gt_bboxes, self.means, self.stds,
-                              self.angle_range, self.norm_factor,
+                              self.angle_version, self.norm_factor,
                               self.edge_swap, self.proj_xy)
         else:
             raise NotImplementedError
@@ -99,10 +102,10 @@ def decode(self,
             torch.Tensor: Decoded boxes.
         """
         assert pred_bboxes.size(0) == bboxes.size(0)
-        if self.angle_range in ['oc', 'le135', 'le90']:
+        if self.angle_version in ['oc', 'le135', 'le90']:
             return delta2bbox(bboxes, pred_bboxes, self.means, self.stds,
                               max_shape, wh_ratio_clip, self.add_ctr_clamp,
-                              self.ctr_clamp, self.angle_range,
+                              self.ctr_clamp, self.angle_version,
                               self.norm_factor, self.edge_swap, self.proj_xy)
         else:
             raise NotImplementedError
@@ -113,7 +116,7 @@ def bbox2delta(proposals,
                gt,
                means=(0., 0., 0., 0., 0.),
                stds=(1., 1., 1., 1., 1.),
-               angle_range='oc',
+               angle_version='oc',
                norm_factor=None,
                edge_swap=False,
                proj_xy=False):
@@ -127,7 +130,7 @@ def bbox2delta(proposals,
         means (Sequence[float]): Denormalizing means for delta coordinates
         stds (Sequence[float]): Denormalizing standard deviation for delta
             coordinates.
-        angle_range (str, optional): Angle representations. Defaults to 'oc'.
+        angle_version (str, optional): Angle representations. Defaults to 'oc'.
         norm_factor (None|float, optional): Regularization factor of angle.
         edge_swap (bool, optional): Whether swap the edge if w < h.
             Defaults to False.
@@ -139,6 +142,8 @@ def bbox2delta(proposals,
             dw, dh, da.
     """
     assert proposals.size() == gt.size()
+    proposals = proposals.tensor
+    gt = gt.regularize_boxes(angle_version)
     proposals = proposals.float()
     gt = gt.float()
     px, py, pw, ph, pa = proposals.unbind(dim=-1)
@@ -152,8 +157,8 @@ def bbox2delta(proposals,
         dy = (gy - py) / ph
 
     if edge_swap:
-        dtheta1 = norm_angle(ga - pa, angle_range)
-        dtheta2 = norm_angle(ga - pa + np.pi / 2, angle_range)
+        dtheta1 = norm_angle(ga - pa, angle_version)
+        dtheta2 = norm_angle(ga - pa + np.pi / 2, angle_version)
         abs_dtheta1 = torch.abs(dtheta1)
         abs_dtheta2 = torch.abs(dtheta2)
         gw_regular = torch.where(abs_dtheta1 < abs_dtheta2, gw, gh)
@@ -162,7 +167,7 @@ def bbox2delta(proposals,
         dw = torch.log(gw_regular / pw)
         dh = torch.log(gh_regular / ph)
     else:
-        da = norm_angle(ga - pa, angle_range)
+        da = norm_angle(ga - pa, angle_version)
         dw = torch.log(gw / pw)
         dh = torch.log(gh / ph)
 
@@ -185,7 +190,7 @@ def delta2bbox(rois,
                wh_ratio_clip=16 / 1000,
                add_ctr_clamp=False,
                ctr_clamp=32,
-               angle_range='oc',
+               angle_version='oc',
                norm_factor=None,
                edge_swap=False,
                proj_xy=False):
@@ -216,7 +221,7 @@ def delta2bbox(rois,
             the original anchor's center. Only used by YOLOF. Default False.
         ctr_clamp (int): the maximum pixel shift to clamp. Only used by
             YOLOF. Default 32.
-        angle_range (str, optional): Angle representations. Defaults to 'oc'.
+        angle_version (str, optional): Angle representations. Defaults to 'oc'.
         norm_factor (None|float, optional): Regularization factor of angle.
         edge_swap (bool, optional): Whether swap the edge if w < h.
             Defaults to False.
@@ -227,8 +232,8 @@ def delta2bbox(rois,
         Tensor: Boxes with shape (N, num_classes * 5) or (N, 5), where 5
            represent cx, cy, w, h, a.
     """
-    means = deltas.new_tensor(means).view(1, -1).repeat(1, deltas.size(1) // 5)
-    stds = deltas.new_tensor(stds).view(1, -1).repeat(1, deltas.size(1) // 5)
+    means = deltas.new_tensor(means).view(1, -1)
+    stds = deltas.new_tensor(stds).view(1, -1)
     denorm_deltas = deltas * stds + means
     dx = denorm_deltas[:, 0::5]
     dy = denorm_deltas[:, 1::5]
@@ -267,7 +272,7 @@ def delta2bbox(rois,
         gx = px + dx_width
         gy = py + dy_height
     # Compute angle
-    ga = norm_angle(pa + da, angle_range)
+    ga = norm_angle(pa + da, angle_version)
     if max_shape is not None:
         gx = gx.clamp(min=0, max=max_shape[1] - 1)
         gy = gy.clamp(min=0, max=max_shape[0] - 1)
@@ -276,8 +281,11 @@ def delta2bbox(rois,
         w_regular = torch.where(gw > gh, gw, gh)
         h_regular = torch.where(gw > gh, gh, gw)
         theta_regular = torch.where(gw > gh, ga, ga + np.pi / 2)
-        theta_regular = norm_angle(theta_regular, angle_range)
-        return torch.stack([gx, gy, w_regular, h_regular, theta_regular],
-                           dim=-1).view_as(deltas)
+        theta_regular = norm_angle(theta_regular, angle_version)
+        decoded_bbox = torch.stack(
+            [gx, gy, w_regular, h_regular, theta_regular],
+            dim=-1).view_as(deltas)
     else:
-        return torch.stack([gx, gy, gw, gh, ga], dim=-1).view(deltas.size())
+        decoded_bbox = torch.stack([gx, gy, gw, gh, ga],
+                                   dim=-1).view(deltas.size())
+    return RotatedBoxes(decoded_bbox)
diff --git a/mmrotate/core/bbox/iou_calculators/rotate_iou2d_calculator.py b/mmrotate/core/bbox/iou_calculators/rotate_iou2d_calculator.py
index 9996347eb..f595b2a5e 100644
--- a/mmrotate/core/bbox/iou_calculators/rotate_iou2d_calculator.py
+++ b/mmrotate/core/bbox/iou_calculators/rotate_iou2d_calculator.py
@@ -41,8 +41,7 @@ def __call__(self,
             bboxes2 = bboxes2[..., :5]
         if bboxes1.size(-1) == 6:
             bboxes1 = bboxes1[..., :5]
-        return rbbox_overlaps(bboxes1.contiguous(), bboxes2.contiguous(), mode,
-                              is_aligned)
+        return rbbox_overlaps(bboxes1.tensor, bboxes2.tensor, mode, is_aligned)
 
     def __repr__(self):
         """str: a string describing the module"""

From 7741386f02bd4440664c779ae53943d1442a996c Mon Sep 17 00:00:00 2001
From: zytx121 <592267829@qq.com>
Date: Fri, 19 Aug 2022 15:40:43 +0800
Subject: [PATCH 2/3] delete RotatedAnchorHead

---
 configs/_base_/datasets/dota.py               |   9 +-
 mmrotate/models/dense_heads/__init__.py       |  13 +-
 .../models/dense_heads/rotated_anchor_head.py | 787 ------------------
 .../models/dense_heads/rotated_retina_head.py |   4 +-
 4 files changed, 13 insertions(+), 800 deletions(-)
 delete mode 100644 mmrotate/models/dense_heads/rotated_anchor_head.py

diff --git a/configs/_base_/datasets/dota.py b/configs/_base_/datasets/dota.py
index 95df7355b..a6d839fef 100644
--- a/configs/_base_/datasets/dota.py
+++ b/configs/_base_/datasets/dota.py
@@ -8,7 +8,10 @@
     dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'),
     dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')),
     dict(type='mmdet.Resize', scale=(1024, 2014), keep_ratio=True),
-    dict(type='mmdet.RandomFlip', prob=0.75, direction=['horizontal', 'vertical', 'diagonal']),
+    dict(
+        type='mmdet.RandomFlip',
+        prob=0.75,
+        direction=['horizontal', 'vertical', 'diagonal']),
     dict(type='mmdet.PackDetInputs')
 ]
 val_pipeline = [
@@ -61,9 +64,7 @@
         pipeline=val_pipeline))
 test_dataloader = val_dataloader
 
-val_evaluator = dict(
-    type='DOTAMetric',
-    metric='mAP')
+val_evaluator = dict(type='DOTAMetric', metric='mAP')
 test_evaluator = val_evaluator
 
 # inference on test dataset and
diff --git a/mmrotate/models/dense_heads/__init__.py b/mmrotate/models/dense_heads/__init__.py
index 7e6b2c0fd..556907c9d 100644
--- a/mmrotate/models/dense_heads/__init__.py
+++ b/mmrotate/models/dense_heads/__init__.py
@@ -8,7 +8,6 @@
 from .oriented_reppoints_head import OrientedRepPointsHead
 from .oriented_rpn_head import OrientedRPNHead
 from .rotated_anchor_free_head import RotatedAnchorFreeHead
-from .rotated_anchor_head import RotatedAnchorHead
 from .rotated_atss_head import RotatedATSSHead
 from .rotated_fcos_head import RotatedFCOSHead
 from .rotated_reppoints_head import RotatedRepPointsHead
@@ -18,10 +17,10 @@
 from .sam_reppoints_head import SAMRepPointsHead
 
 __all__ = [
-    'RotatedAnchorHead', 'RotatedRetinaHead', 'RotatedRPNHead',
-    'OrientedRPNHead', 'RotatedRetinaRefineHead', 'ODMRefineHead',
-    'KFIoURRetinaHead', 'KFIoURRetinaRefineHead', 'KFIoUODMRefineHead',
-    'RotatedRepPointsHead', 'SAMRepPointsHead', 'CSLRRetinaHead',
-    'RotatedATSSHead', 'RotatedAnchorFreeHead', 'RotatedFCOSHead',
-    'CSLRFCOSHead', 'OrientedRepPointsHead'
+    'RotatedRetinaHead', 'RotatedRPNHead', 'OrientedRPNHead',
+    'RotatedRetinaRefineHead', 'ODMRefineHead', 'KFIoURRetinaHead',
+    'KFIoURRetinaRefineHead', 'KFIoUODMRefineHead', 'RotatedRepPointsHead',
+    'SAMRepPointsHead', 'CSLRRetinaHead', 'RotatedATSSHead',
+    'RotatedAnchorFreeHead', 'RotatedFCOSHead', 'CSLRFCOSHead',
+    'OrientedRepPointsHead'
 ]
diff --git a/mmrotate/models/dense_heads/rotated_anchor_head.py b/mmrotate/models/dense_heads/rotated_anchor_head.py
deleted file mode 100644
index 6aed0623c..000000000
--- a/mmrotate/models/dense_heads/rotated_anchor_head.py
+++ /dev/null
@@ -1,787 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from inspect import signature
-
-import torch
-import torch.nn as nn
-from mmcv.runner import force_fp32
-from mmdet.core import images_to_levels, multi_apply, unmap
-from mmdet.models.dense_heads.base_dense_head import BaseDenseHead
-
-from mmrotate.core import (aug_multiclass_nms_rotated, bbox_mapping_back,
-                           build_assigner, build_bbox_coder,
-                           build_prior_generator, build_sampler,
-                           multiclass_nms_rotated, obb2hbb,
-                           rotated_anchor_inside_flags)
-from ..builder import ROTATED_HEADS, build_loss
-
-
-@ROTATED_HEADS.register_module()
-class RotatedAnchorHead(BaseDenseHead):
-    """Rotated Anchor-based head (RotatedRPN, RotatedRetinaNet, etc.).
-
-    Args:
-        num_classes (int): Number of categories excluding the background
-            category.
-        in_channels (int): Number of channels in the input feature map.
-        feat_channels (int): Number of hidden channels. Used in child classes.
-        anchor_generator (dict): Config dict for anchor generator
-        bbox_coder (dict): Config of bounding box coder.
-        reg_decoded_bbox (bool): If true, the regression loss would be
-            applied on decoded bounding boxes. Default: False
-        assign_by_circumhbbox (str): If None, assigner will assign according to
-            the IoU between anchor and GT (OBB), called RetinaNet-OBB.
-            If angle definition method, assigner will assign according to the
-            IoU between anchor and GT's circumbox (HBB), called RetinaNet-HBB.
-        loss_cls (dict): Config of classification loss.
-        loss_bbox (dict): Config of localization loss.
-        train_cfg (dict): Training config of anchor head.
-        test_cfg (dict): Testing config of anchor head.
-        init_cfg (dict or list[dict], optional): Initialization config dict.
-    """  # noqa: W605
-
-    def __init__(self,
-                 num_classes,
-                 in_channels,
-                 feat_channels=256,
-                 anchor_generator=dict(
-                     type='RotatedAnchorGenerator',
-                     octave_base_scale=4,
-                     scales_per_octave=3,
-                     ratios=[1.0, 0.5, 2.0],
-                     strides=[8, 16, 32, 64, 128]),
-                 bbox_coder=dict(
-                     type='DeltaXYWHAOBBoxCoder',
-                     target_means=(.0, .0, .0, .0, .0),
-                     target_stds=(1.0, 1.0, 1.0, 1.0, 1.0)),
-                 reg_decoded_bbox=False,
-                 assign_by_circumhbbox='oc',
-                 loss_cls=dict(
-                     type='FocalLoss',
-                     use_sigmoid=True,
-                     gamma=2.0,
-                     alpha=0.25,
-                     loss_weight=1.0),
-                 loss_bbox=dict(type='L1Loss', loss_weight=1.0),
-                 train_cfg=None,
-                 test_cfg=None,
-                 init_cfg=dict(type='Normal', layer='Conv2d', std=0.01)):
-        super(RotatedAnchorHead, self).__init__(init_cfg)
-        self.in_channels = in_channels
-        self.num_classes = num_classes
-        self.feat_channels = feat_channels
-        self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)
-        # TODO better way to determine whether sample or not
-        self.sampling = loss_cls['type'] not in [
-            'FocalLoss', 'GHMC', 'QualityFocalLoss'
-        ]
-        if self.use_sigmoid_cls:
-            self.cls_out_channels = num_classes
-        else:
-            self.cls_out_channels = num_classes + 1
-
-        if self.cls_out_channels <= 0:
-            raise ValueError(f'num_classes={num_classes} is too small')
-        self.reg_decoded_bbox = reg_decoded_bbox
-        self.assign_by_circumhbbox = assign_by_circumhbbox
-        self.bbox_coder = build_bbox_coder(bbox_coder)
-        self.loss_cls = build_loss(loss_cls)
-        self.loss_bbox = build_loss(loss_bbox)
-        self.train_cfg = train_cfg
-        self.test_cfg = test_cfg
-        if self.train_cfg:
-            self.assigner = build_assigner(self.train_cfg.assigner)
-            # use PseudoSampler when sampling is False
-            if self.sampling and hasattr(self.train_cfg, 'sampler'):
-                sampler_cfg = self.train_cfg.sampler
-            else:
-                sampler_cfg = dict(type='PseudoSampler')
-            self.sampler = build_sampler(sampler_cfg, context=self)
-        self.fp16_enabled = False
-
-        self.anchor_generator = build_prior_generator(anchor_generator)
-        # usually the numbers of anchors for each level are the same
-        # except SSD detectors
-        self.num_anchors = self.anchor_generator.num_base_anchors[0]
-        self._init_layers()
-
-    def _init_layers(self):
-        """Initialize layers of the head."""
-        self.conv_cls = nn.Conv2d(self.in_channels,
-                                  self.num_anchors * self.cls_out_channels, 1)
-        self.conv_reg = nn.Conv2d(self.in_channels, self.num_anchors * 5, 1)
-
-    def forward_single(self, x):
-        """Forward feature of a single scale level.
-
-        Args:
-            x (torch.Tensor): Features of a single scale level.
-
-        Returns:
-            tuple (torch.Tensor):
-
-                - cls_score (torch.Tensor): Cls scores for a single scale \
-                    level the channels number is num_anchors * num_classes.
-                - bbox_pred (torch.Tensor): Box energies / deltas for a \
-                    single scale level, the channels number is num_anchors * 5.
-        """
-        cls_score = self.conv_cls(x)
-        bbox_pred = self.conv_reg(x)
-        return cls_score, bbox_pred
-
-    def forward(self, feats):
-        """Forward features from the upstream network.
-
-        Args:
-            feats (tuple[Tensor]): Features from the upstream network, each is
-                a 4D-tensor.
-
-        Returns:
-            tuple: A tuple of classification scores and bbox prediction.
-
-                - cls_scores (list[Tensor]): Classification scores for all \
-                    scale levels, each is a 4D-tensor, the channels number \
-                    is num_anchors * num_classes.
-                - bbox_preds (list[Tensor]): Box energies / deltas for all \
-                    scale levels, each is a 4D-tensor, the channels number \
-                    is num_anchors * 5.
-        """
-        return multi_apply(self.forward_single, feats)
-
-    def get_anchors(self, featmap_sizes, img_metas, device='cuda'):
-        """Get anchors according to feature map sizes.
-
-        Args:
-            featmap_sizes (list[tuple]): Multi-level feature map sizes.
-            img_metas (list[dict]): Image meta info.
-            device (torch.device | str): Device for returned tensors
-
-        Returns:
-            tuple (list[Tensor]):
-
-                - anchor_list (list[Tensor]): Anchors of each image.
-                - valid_flag_list (list[Tensor]): Valid flags of each image.
-        """
-        num_imgs = len(img_metas)
-
-        # since feature map sizes of all images are the same, we only compute
-        # anchors for one time
-        multi_level_anchors = self.anchor_generator.grid_priors(
-            featmap_sizes, device)
-        anchor_list = [multi_level_anchors for _ in range(num_imgs)]
-
-        # for each image, we compute valid flags of multi level anchors
-        valid_flag_list = []
-        for img_id, img_meta in enumerate(img_metas):
-            multi_level_flags = self.anchor_generator.valid_flags(
-                featmap_sizes, img_meta['pad_shape'], device)
-            valid_flag_list.append(multi_level_flags)
-
-        return anchor_list, valid_flag_list
-
-    def _get_targets_single(self,
-                            flat_anchors,
-                            valid_flags,
-                            gt_bboxes,
-                            gt_bboxes_ignore,
-                            gt_labels,
-                            img_meta,
-                            label_channels=1,
-                            unmap_outputs=True):
-        """Compute regression and classification targets for anchors in a
-        single image.
-
-        Args:
-            flat_anchors (torch.Tensor): Multi-level anchors of the image,
-                which are concatenated into a single tensor of shape
-                (num_anchors, 5)
-            valid_flags (torch.Tensor): Multi level valid flags of the image,
-                which are concatenated into a single tensor of
-                    shape (num_anchors,).
-            gt_bboxes (torch.Tensor): Ground truth bboxes of the image,
-                shape (num_gts, 5).
-            img_meta (dict): Meta info of the image.
-            gt_bboxes_ignore (torch.Tensor): Ground truth bboxes to be
-                ignored, shape (num_ignored_gts, 5).
-            img_meta (dict): Meta info of the image.
-            gt_labels (torch.Tensor): Ground truth labels of each box,
-                shape (num_gts,).
-            label_channels (int): Channel of label.
-            unmap_outputs (bool): Whether to map outputs back to the original
-                set of anchors.
-
-        Returns:
-            tuple (list[Tensor]):
-
-                - labels_list (list[Tensor]): Labels of each level
-                - label_weights_list (list[Tensor]): Label weights of each \
-                  level
-                - bbox_targets_list (list[Tensor]): BBox targets of each level
-                - bbox_weights_list (list[Tensor]): BBox weights of each level
-                - num_total_pos (int): Number of positive samples in all images
-                - num_total_neg (int): Number of negative samples in all images
-        """
-        inside_flags = rotated_anchor_inside_flags(
-            flat_anchors, valid_flags, img_meta['img_shape'][:2],
-            self.train_cfg.allowed_border)
-        if not inside_flags.any():
-            return (None, ) * 7
-        # assign gt and sample anchors
-        anchors = flat_anchors[inside_flags, :]
-
-        if self.assign_by_circumhbbox is not None:
-            gt_bboxes_assign = obb2hbb(gt_bboxes, self.assign_by_circumhbbox)
-            assign_result = self.assigner.assign(
-                anchors, gt_bboxes_assign, gt_bboxes_ignore,
-                None if self.sampling else gt_labels)
-        else:
-            assign_result = self.assigner.assign(
-                anchors, gt_bboxes, gt_bboxes_ignore,
-                None if self.sampling else gt_labels)
-
-        sampling_result = self.sampler.sample(assign_result, anchors,
-                                              gt_bboxes)
-
-        num_valid_anchors = anchors.shape[0]
-        bbox_targets = torch.zeros_like(anchors)
-        bbox_weights = torch.zeros_like(anchors)
-        labels = anchors.new_full((num_valid_anchors, ),
-                                  self.num_classes,
-                                  dtype=torch.long)
-        label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)
-
-        pos_inds = sampling_result.pos_inds
-        neg_inds = sampling_result.neg_inds
-        if len(pos_inds) > 0:
-            if not self.reg_decoded_bbox:
-                pos_bbox_targets = self.bbox_coder.encode(
-                    sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes)
-            else:
-                pos_bbox_targets = sampling_result.pos_gt_bboxes
-            bbox_targets[pos_inds, :] = pos_bbox_targets
-            bbox_weights[pos_inds, :] = 1.0
-            if gt_labels is None:
-                # Only rpn gives gt_labels as None
-                # Foreground is the first class since v2.5.0
-                labels[pos_inds] = 0
-            else:
-                labels[pos_inds] = gt_labels[
-                    sampling_result.pos_assigned_gt_inds]
-            if self.train_cfg.pos_weight <= 0:
-                label_weights[pos_inds] = 1.0
-            else:
-                label_weights[pos_inds] = self.train_cfg.pos_weight
-        if len(neg_inds) > 0:
-            label_weights[neg_inds] = 1.0
-
-        # map up to original set of anchors
-        if unmap_outputs:
-            num_total_anchors = flat_anchors.size(0)
-            labels = unmap(
-                labels, num_total_anchors, inside_flags,
-                fill=self.num_classes)  # fill bg label
-            label_weights = unmap(label_weights, num_total_anchors,
-                                  inside_flags)
-            bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags)
-            bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags)
-
-        return (labels, label_weights, bbox_targets, bbox_weights, pos_inds,
-                neg_inds, sampling_result)
-
-    def get_targets(self,
-                    anchor_list,
-                    valid_flag_list,
-                    gt_bboxes_list,
-                    img_metas,
-                    gt_bboxes_ignore_list=None,
-                    gt_labels_list=None,
-                    label_channels=1,
-                    unmap_outputs=True,
-                    return_sampling_results=False):
-        """Compute regression and classification targets for anchors in
-        multiple images.
-
-        Args:
-            anchor_list (list[list[Tensor]]): Multi level anchors of each
-                image. The outer list indicates images, and the inner list
-                corresponds to feature levels of the image. Each element of
-                the inner list is a tensor of shape (num_anchors, 5).
-            valid_flag_list (list[list[Tensor]]): Multi level valid flags of
-                each image. The outer list indicates images, and the inner list
-                corresponds to feature levels of the image. Each element of
-                the inner list is a tensor of shape (num_anchors, )
-            gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.
-            img_metas (list[dict]): Meta info of each image.
-            gt_bboxes_ignore_list (list[Tensor]): Ground truth bboxes to be
-                ignored.
-            gt_labels_list (list[Tensor]): Ground truth labels of each box.
-            label_channels (int): Channel of label.
-            unmap_outputs (bool): Whether to map outputs back to the original
-                set of anchors.
-
-        Returns:
-            tuple: Usually returns a tuple containing learning targets.
-
-                - labels_list (list[Tensor]): Labels of each level.
-                - label_weights_list (list[Tensor]): Label weights of each \
-                    level.
-                - bbox_targets_list (list[Tensor]): BBox targets of each level.
-                - bbox_weights_list (list[Tensor]): BBox weights of each level.
-                - num_total_pos (int): Number of positive samples in all \
-                    images.
-                - num_total_neg (int): Number of negative samples in all \
-                    images.
-
-            additional_returns: This function enables user-defined returns from
-                `self._get_targets_single`. These returns are currently refined
-                to properties at each feature map (i.e. having HxW dimension).
-                The results will be concatenated after the end
-        """
-        num_imgs = len(img_metas)
-        assert len(anchor_list) == len(valid_flag_list) == num_imgs
-
-        # anchor number of multi levels
-        num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]
-        # concat all level anchors to a single tensor
-        concat_anchor_list = []
-        concat_valid_flag_list = []
-        for i in range(num_imgs):
-            assert len(anchor_list[i]) == len(valid_flag_list[i])
-            concat_anchor_list.append(torch.cat(anchor_list[i]))
-            concat_valid_flag_list.append(torch.cat(valid_flag_list[i]))
-
-        # compute targets for each image
-        if gt_bboxes_ignore_list is None:
-            gt_bboxes_ignore_list = [None for _ in range(num_imgs)]
-        if gt_labels_list is None:
-            gt_labels_list = [None for _ in range(num_imgs)]
-        results = multi_apply(
-            self._get_targets_single,
-            concat_anchor_list,
-            concat_valid_flag_list,
-            gt_bboxes_list,
-            gt_bboxes_ignore_list,
-            gt_labels_list,
-            img_metas,
-            label_channels=label_channels,
-            unmap_outputs=unmap_outputs)
-        (all_labels, all_label_weights, all_bbox_targets, all_bbox_weights,
-         pos_inds_list, neg_inds_list, sampling_results_list) = results[:7]
-        rest_results = list(results[7:])  # user-added return values
-        # no valid anchors
-        if any([labels is None for labels in all_labels]):
-            return None
-        # sampled anchors of all images
-        num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])
-        num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])
-        # split targets to a list w.r.t. multiple levels
-        labels_list = images_to_levels(all_labels, num_level_anchors)
-        label_weights_list = images_to_levels(all_label_weights,
-                                              num_level_anchors)
-        bbox_targets_list = images_to_levels(all_bbox_targets,
-                                             num_level_anchors)
-        bbox_weights_list = images_to_levels(all_bbox_weights,
-                                             num_level_anchors)
-        res = (labels_list, label_weights_list, bbox_targets_list,
-               bbox_weights_list, num_total_pos, num_total_neg)
-        if return_sampling_results:
-            res = res + (sampling_results_list, )
-        for i, r in enumerate(rest_results):  # user-added return values
-            rest_results[i] = images_to_levels(r, num_level_anchors)
-
-        return res + tuple(rest_results)
-
-    def loss_single(self, cls_score, bbox_pred, anchors, labels, label_weights,
-                    bbox_targets, bbox_weights, num_total_samples):
-        """Compute loss of a single scale level.
-
-        Args:
-            cls_score (torch.Tensor): Box scores for each scale level
-                Has shape (N, num_anchors * num_classes, H, W).
-            bbox_pred (torch.Tensor): Box energies / deltas for each scale
-                level with shape (N, num_anchors * 5, H, W).
-            anchors (torch.Tensor): Box reference for each scale level with
-                shape (N, num_total_anchors, 5).
-            labels (torch.Tensor): Labels of each anchors with shape
-                (N, num_total_anchors).
-            label_weights (torch.Tensor): Label weights of each anchor with
-                shape (N, num_total_anchors)
-            bbox_targets (torch.Tensor): BBox regression targets of each anchor
-            weight shape (N, num_total_anchors, 5).
-            bbox_weights (torch.Tensor): BBox regression loss weights of each
-                anchor with shape (N, num_total_anchors, 5).
-            num_total_samples (int): If sampling, num total samples equal to
-                the number of total anchors; Otherwise, it is the number of
-                positive anchors.
-
-        Returns:
-            tuple (torch.Tensor):
-
-                - loss_cls (torch.Tensor): cls. loss for each scale level.
-                - loss_bbox (torch.Tensor): reg. loss for each scale level.
-        """
-        # classification loss
-        labels = labels.reshape(-1)
-        label_weights = label_weights.reshape(-1)
-        cls_score = cls_score.permute(0, 2, 3,
-                                      1).reshape(-1, self.cls_out_channels)
-        loss_cls = self.loss_cls(
-            cls_score, labels, label_weights, avg_factor=num_total_samples)
-        # regression loss
-        bbox_targets = bbox_targets.reshape(-1, 5)
-        bbox_weights = bbox_weights.reshape(-1, 5)
-        bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 5)
-        if self.reg_decoded_bbox:
-            anchors = anchors.reshape(-1, 5)
-            bbox_pred = self.bbox_coder.decode(anchors, bbox_pred)
-
-        loss_bbox = self.loss_bbox(
-            bbox_pred,
-            bbox_targets,
-            bbox_weights,
-            avg_factor=num_total_samples)
-        return loss_cls, loss_bbox
-
-    @force_fp32(apply_to=('cls_scores', 'bbox_preds'))
-    def loss(self,
-             cls_scores,
-             bbox_preds,
-             gt_bboxes,
-             gt_labels,
-             img_metas,
-             gt_bboxes_ignore=None):
-        """Compute losses of the head.
-
-        Args:
-            cls_scores (list[Tensor]): Box scores for each scale level
-                Has shape (N, num_anchors * num_classes, H, W)
-            bbox_preds (list[Tensor]): Box energies / deltas for each scale
-                level with shape (N, num_anchors * 5, H, W)
-            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with
-                shape (num_gts, 5) in [cx, cy, w, h, a] format.
-            gt_labels (list[Tensor]): class indices corresponding to each box
-            img_metas (list[dict]): Meta information of each image, e.g.,
-                image size, scaling factor, etc.
-            gt_bboxes_ignore (None | list[Tensor]): specify which bounding
-                boxes can be ignored when computing the loss. Default: None
-
-        Returns:
-            dict[str, Tensor]: A dictionary of loss components.
-        """
-        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
-        assert len(featmap_sizes) == self.anchor_generator.num_levels
-
-        device = cls_scores[0].device
-
-        anchor_list, valid_flag_list = self.get_anchors(
-            featmap_sizes, img_metas, device=device)
-        label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1
-        cls_reg_targets = self.get_targets(
-            anchor_list,
-            valid_flag_list,
-            gt_bboxes,
-            img_metas,
-            gt_bboxes_ignore_list=gt_bboxes_ignore,
-            gt_labels_list=gt_labels,
-            label_channels=label_channels)
-        if cls_reg_targets is None:
-            return None
-        (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,
-         num_total_pos, num_total_neg) = cls_reg_targets
-        num_total_samples = (
-            num_total_pos + num_total_neg if self.sampling else num_total_pos)
-
-        # anchor number of multi levels
-        num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]
-        # concat all level anchors and flags to a single tensor
-        concat_anchor_list = []
-        for i, _ in enumerate(anchor_list):
-            concat_anchor_list.append(torch.cat(anchor_list[i]))
-        all_anchor_list = images_to_levels(concat_anchor_list,
-                                           num_level_anchors)
-
-        losses_cls, losses_bbox = multi_apply(
-            self.loss_single,
-            cls_scores,
-            bbox_preds,
-            all_anchor_list,
-            labels_list,
-            label_weights_list,
-            bbox_targets_list,
-            bbox_weights_list,
-            num_total_samples=num_total_samples)
-        return dict(loss_cls=losses_cls, loss_bbox=losses_bbox)
-
-    @force_fp32(apply_to=('cls_scores', 'bbox_preds'))
-    def get_bboxes(self,
-                   cls_scores,
-                   bbox_preds,
-                   img_metas,
-                   cfg=None,
-                   rescale=False,
-                   with_nms=True):
-        """Transform network output for a batch into bbox predictions.
-
-        Args:
-            cls_scores (list[Tensor]): Box scores for each scale level
-                Has shape (N, num_anchors * num_classes, H, W)
-            bbox_preds (list[Tensor]): Box energies / deltas for each scale
-                level with shape (N, num_anchors * 5, H, W)
-            img_metas (list[dict]): Meta information of each image, e.g.,
-                image size, scaling factor, etc.
-            cfg (mmcv.Config | None): Test / postprocessing configuration,
-                if None, test_cfg would be used
-            rescale (bool): If True, return boxes in original image space.
-                Default: False.
-            with_nms (bool): If True, do nms before return boxes.
-                Default: True.
-
-        Returns:
-            list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.
-                The first item is an (n, 6) tensor, where the first 5 columns
-                are bounding box positions (cx, cy, w, h, a) and the
-                6-th column is a score between 0 and 1. The second item is a
-                (n,) tensor where each item is the predicted class label of the
-                corresponding box.
-
-        Example:
-            >>> import mmcv
-            >>> self = AnchorHead(
-            >>>     num_classes=9,
-            >>>     in_channels=1,
-            >>>     anchor_generator=dict(
-            >>>         type='AnchorGenerator',
-            >>>         scales=[8],
-            >>>         ratios=[0.5, 1.0, 2.0],
-            >>>         strides=[4,]))
-            >>> img_metas = [{'img_shape': (32, 32, 3), 'scale_factor': 1}]
-            >>> cfg = mmcv.Config(dict(
-            >>>     score_thr=0.00,
-            >>>     nms=dict(type='nms', iou_thr=1.0),
-            >>>     max_per_img=10))
-            >>> feat = torch.rand(1, 1, 3, 3)
-            >>> cls_score, bbox_pred = self.forward_single(feat)
-            >>> # note the input lists are over different levels, not images
-            >>> cls_scores, bbox_preds = [cls_score], [bbox_pred]
-            >>> result_list = self.get_bboxes(cls_scores, bbox_preds,
-            >>>                               img_metas, cfg)
-            >>> det_bboxes, det_labels = result_list[0]
-            >>> assert len(result_list) == 1
-            >>> assert det_bboxes.shape[1] == 5
-            >>> assert len(det_bboxes) == len(det_labels) == cfg.max_per_img
-        """
-        assert len(cls_scores) == len(bbox_preds)
-        num_levels = len(cls_scores)
-
-        device = cls_scores[0].device
-        featmap_sizes = [cls_scores[i].shape[-2:] for i in range(num_levels)]
-        mlvl_anchors = self.anchor_generator.grid_priors(
-            featmap_sizes, device=device)
-
-        result_list = []
-        for img_id, _ in enumerate(img_metas):
-            cls_score_list = [
-                cls_scores[i][img_id].detach() for i in range(num_levels)
-            ]
-            bbox_pred_list = [
-                bbox_preds[i][img_id].detach() for i in range(num_levels)
-            ]
-            img_shape = img_metas[img_id]['img_shape']
-            scale_factor = img_metas[img_id]['scale_factor']
-            if with_nms:
-                # some heads don't support with_nms argument
-                proposals = self._get_bboxes_single(cls_score_list,
-                                                    bbox_pred_list,
-                                                    mlvl_anchors, img_shape,
-                                                    scale_factor, cfg, rescale)
-            else:
-                proposals = self._get_bboxes_single(cls_score_list,
-                                                    bbox_pred_list,
-                                                    mlvl_anchors, img_shape,
-                                                    scale_factor, cfg, rescale,
-                                                    with_nms)
-            result_list.append(proposals)
-        return result_list
-
-    def _get_bboxes_single(self,
-                           cls_score_list,
-                           bbox_pred_list,
-                           mlvl_anchors,
-                           img_shape,
-                           scale_factor,
-                           cfg,
-                           rescale=False,
-                           with_nms=True):
-        """Transform outputs for a single batch item into bbox predictions.
-
-        Args:
-            cls_score_list (list[Tensor]): Box scores for a single scale level
-                Has shape (num_anchors * num_classes, H, W).
-            bbox_pred_list (list[Tensor]): Box energies / deltas for a single
-                scale level with shape (num_anchors * 4, H, W).
-            mlvl_anchors (list[Tensor]): Box reference for a single scale level
-                with shape (num_total_anchors, 4).
-            img_shape (tuple[int]): Shape of the input image,
-                (height, width, 3).
-            scale_factor (ndarray): Scale factor of the image arange as
-                (w_scale, h_scale, w_scale, h_scale).
-            cfg (mmcv.Config): Test / postprocessing configuration,
-                if None, test_cfg would be used.
-            rescale (bool): If True, return boxes in original image space.
-                Default: False.
-            with_nms (bool): If True, do nms before return boxes.
-                Default: True.
-
-        Returns:
-            Tensor: Labeled boxes in shape (n, 5), where the first 4 columns
-                are bounding box positions (cx, cy, w, h, a) and the
-                6-th column is a score between 0 and 1.
-        """
-        cfg = self.test_cfg if cfg is None else cfg
-        assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors)
-        mlvl_bboxes = []
-        mlvl_scores = []
-        for cls_score, bbox_pred, anchors in zip(cls_score_list,
-                                                 bbox_pred_list, mlvl_anchors):
-            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
-            cls_score = cls_score.permute(1, 2,
-                                          0).reshape(-1, self.cls_out_channels)
-            if self.use_sigmoid_cls:
-                scores = cls_score.sigmoid()
-            else:
-                scores = cls_score.softmax(-1)
-            bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 5)
-            nms_pre = cfg.get('nms_pre', -1)
-            if nms_pre > 0 and scores.shape[0] > nms_pre:
-                # Get maximum scores for foreground classes.
-                if self.use_sigmoid_cls:
-                    max_scores, _ = scores.max(dim=1)
-                else:
-                    # remind that we set FG labels to [0, num_class-1]
-                    # since mmdet v2.0
-                    # BG cat_id: num_class
-                    max_scores, _ = scores[:, :-1].max(dim=1)
-                _, topk_inds = max_scores.topk(nms_pre)
-                anchors = anchors[topk_inds, :]
-                bbox_pred = bbox_pred[topk_inds, :]
-                scores = scores[topk_inds, :]
-            bboxes = self.bbox_coder.decode(
-                anchors, bbox_pred, max_shape=img_shape)
-            mlvl_bboxes.append(bboxes)
-            mlvl_scores.append(scores)
-        mlvl_bboxes = torch.cat(mlvl_bboxes)
-        if rescale:
-            # angle should not be rescaled
-            mlvl_bboxes[:, :4] = mlvl_bboxes[:, :4] / mlvl_bboxes.new_tensor(
-                scale_factor)
-        mlvl_scores = torch.cat(mlvl_scores)
-        if self.use_sigmoid_cls:
-            # Add a dummy background class to the backend when using sigmoid
-            # remind that we set FG labels to [0, num_class-1] since mmdet v2.0
-            # BG cat_id: num_class
-            padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
-            mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)
-
-        if with_nms:
-            det_bboxes, det_labels = multiclass_nms_rotated(
-                mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms,
-                cfg.max_per_img)
-            return det_bboxes, det_labels
-        else:
-            return mlvl_bboxes, mlvl_scores
-
-    def aug_test(self, feats, img_metas, rescale=False):
-        """Test det bboxes with test time augmentation, can be applied in
-        DenseHead except for ``RPNHead`` and its variants, e.g., ``GARPNHead``,
-        etc.
-
-        Args:
-            feats (list[Tensor]): the outer list indicates test-time
-                augmentations and inner Tensor should have a shape NxCxHxW,
-                which contains features for all images in the batch.
-            img_metas (list[list[dict]]): the outer list indicates test-time
-                augs (multiscale, flip, etc.) and the inner list indicates
-                images in a batch. each dict has image information.
-            rescale (bool, optional): Whether to rescale the results.
-                Defaults to False.
-        Returns:
-            list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.
-                The first item is ``bboxes`` with shape (n, 6),
-                where 6 represent (x, y, w, h, a, score).
-                The shape of the second tensor in the tuple is ``labels``
-                with shape (n,). The length of list should always be 1.
-        """
-        # check with_nms argument
-        gb_sig = signature(self.get_bboxes)
-        gb_args = [p.name for p in gb_sig.parameters.values()]
-        gbs_sig = signature(self._get_bboxes_single)
-        gbs_args = [p.name for p in gbs_sig.parameters.values()]
-        assert ('with_nms' in gb_args) and ('with_nms' in gbs_args), \
-            f'{self.__class__.__name__}' \
-            ' does not support test-time augmentation'
-
-        aug_bboxes = []
-        aug_scores = []
-        for x, img_meta in zip(feats, img_metas):
-            # only one image in the batch
-            outs = self.forward(x)
-            bbox_outputs = self.get_bboxes(
-                *outs,
-                img_metas=img_meta,
-                cfg=self.test_cfg,
-                rescale=False,
-                with_nms=False)[0]
-            aug_bboxes.append(bbox_outputs[0])
-            aug_scores.append(bbox_outputs[1])
-
-        # after merging, bboxes will be rescaled to the original image size
-        merged_bboxes, merged_scores = self.merge_aug_bboxes(
-            aug_bboxes, aug_scores, img_metas)
-
-        merged_scores, merged_labels = torch.max(merged_scores[:, :-1], dim=1)
-        merged_bboxes = torch.cat([merged_bboxes, merged_scores[:, None]], -1)
-        if merged_bboxes.numel() == 0:
-            return [
-                (merged_bboxes, merged_labels),
-            ]
-
-        det_bboxes, det_labels = aug_multiclass_nms_rotated(
-            merged_bboxes, merged_labels, self.test_cfg.score_thr,
-            self.test_cfg.nms, self.test_cfg.max_per_img, self.num_classes)
-
-        if rescale:
-            # angle should not be rescaled
-            merged_bboxes[:, :4] *= merged_bboxes.new_tensor(
-                img_metas[0][0]['scale_factor'])
-
-        return [
-            (det_bboxes, det_labels),
-        ]
-
-    def merge_aug_bboxes(self, aug_bboxes, aug_scores, img_metas):
-        """Merge augmented detection bboxes and scores.
-
-        Args:
-            aug_bboxes (list[Tensor]): shape (n, 4*#class)
-            aug_scores (list[Tensor] or None): shape (n, #class)
-            img_shapes (list[Tensor]): shape (3, ).
-
-        Returns:
-            tuple[Tensor]: ``bboxes`` with shape (n,4), where
-            4 represent (tl_x, tl_y, br_x, br_y)
-            and ``scores`` with shape (n,).
-        """
-        recovered_bboxes = []
-        for bboxes, img_info in zip(aug_bboxes, img_metas):
-            img_shape = img_info[0]['img_shape']
-            scale_factor = img_info[0]['scale_factor']
-            flip = img_info[0]['flip']
-            flip_direction = img_info[0]['flip_direction']
-            bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip,
-                                       flip_direction)
-            recovered_bboxes.append(bboxes)
-        bboxes = torch.cat(recovered_bboxes, dim=0)
-        if aug_scores is None:
-            return bboxes
-        else:
-            scores = torch.cat(aug_scores, dim=0)
-            return bboxes, scores
diff --git a/mmrotate/models/dense_heads/rotated_retina_head.py b/mmrotate/models/dense_heads/rotated_retina_head.py
index 2b8c49ebb..4300cb5ac 100644
--- a/mmrotate/models/dense_heads/rotated_retina_head.py
+++ b/mmrotate/models/dense_heads/rotated_retina_head.py
@@ -2,13 +2,13 @@
 import torch.nn as nn
 from mmcv.cnn import ConvModule
 from mmcv.runner import force_fp32
+from mmdet.models.dense_heads import AnchorHead
 
 from ..builder import ROTATED_HEADS
-from .rotated_anchor_head import RotatedAnchorHead
 
 
 @ROTATED_HEADS.register_module()
-class RotatedRetinaHead(RotatedAnchorHead):
+class RotatedRetinaHead(AnchorHead):
     r"""An anchor-based head used in `RotatedRetinaNet
     <https://arxiv.org/pdf/1708.02002.pdf>`_.
 

From a1dbd7d653246fcfb2ed496bb836c2cea2eb29af Mon Sep 17 00:00:00 2001
From: zytx121 <592267829@qq.com>
Date: Fri, 19 Aug 2022 17:27:24 +0800
Subject: [PATCH 3/3] add test_single_stage.py

---
 .../bbox/coder/delta_xywht_rbbox_coder.py     |   1 +
 mmrotate/testing/__init__.py                  |   6 +
 mmrotate/testing/_utils.py                    | 163 ++++++++++++++++++
 .../test_detectors/test_single_stage.py       | 114 ++++++++++++
 .../test_delta_xywht_rbbox_coder.py           |   2 +
 .../test_anchor_generator.py                  |   2 +
 6 files changed, 288 insertions(+)
 create mode 100644 mmrotate/testing/__init__.py
 create mode 100644 mmrotate/testing/_utils.py
 create mode 100644 tests/test_models/test_detectors/test_single_stage.py
 create mode 100644 tests/test_models/test_task_modules/test_coder/test_delta_xywht_rbbox_coder.py
 create mode 100644 tests/test_models/test_task_modules/test_prior_generators/test_anchor_generator.py

diff --git a/mmrotate/core/bbox/coder/delta_xywht_rbbox_coder.py b/mmrotate/core/bbox/coder/delta_xywht_rbbox_coder.py
index 8b402c593..dfa9815da 100644
--- a/mmrotate/core/bbox/coder/delta_xywht_rbbox_coder.py
+++ b/mmrotate/core/bbox/coder/delta_xywht_rbbox_coder.py
@@ -232,6 +232,7 @@ def delta2bbox(rois,
         Tensor: Boxes with shape (N, num_classes * 5) or (N, 5), where 5
            represent cx, cy, w, h, a.
     """
+    rois = rois.tensor
     means = deltas.new_tensor(means).view(1, -1)
     stds = deltas.new_tensor(stds).view(1, -1)
     denorm_deltas = deltas * stds + means
diff --git a/mmrotate/testing/__init__.py b/mmrotate/testing/__init__.py
new file mode 100644
index 000000000..7168133f3
--- /dev/null
+++ b/mmrotate/testing/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from ._utils import get_detector_cfg
+
+__all__ = [
+    'get_detector_cfg'
+]
diff --git a/mmrotate/testing/_utils.py b/mmrotate/testing/_utils.py
new file mode 100644
index 000000000..d8f248c2e
--- /dev/null
+++ b/mmrotate/testing/_utils.py
@@ -0,0 +1,163 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+from os.path import dirname, exists, join
+
+import numpy as np
+import torch
+from mmengine.data import BaseDataElement as PixelData
+from mmengine.data import InstanceData
+from mmrotate.core.bbox.structures import RotatedBoxes
+
+from mmdet.structures import DetDataSample
+
+def _get_config_directory():
+    """Find the predefined detector config directory."""
+    try:
+        # Assume we are running in the source mmrotate repo
+        repo_dpath = dirname(dirname(dirname(__file__)))
+    except NameError:
+        # For IPython development when this __file__ is not defined
+        import mmdet
+        repo_dpath = dirname(dirname(mmrotate.__file__))
+    config_dpath = join(repo_dpath, 'configs')
+    if not exists(config_dpath):
+        raise Exception('Cannot find config path')
+    return config_dpath
+
+
+def _get_config_module(fname):
+    """Load a configuration as a python module."""
+    from mmengine import Config
+    config_dpath = _get_config_directory()
+    config_fpath = join(config_dpath, fname)
+    config_mod = Config.fromfile(config_fpath)
+    return config_mod
+
+
+def get_detector_cfg(fname):
+    """Grab configs necessary to create a detector.
+
+    These are deep copied to allow for safe modification of parameters without
+    influencing other tests.
+    """
+    config = _get_config_module(fname)
+    model = copy.deepcopy(config.model)
+    return model
+
+def _rand_bboxes(rng, num_boxes, w, h):
+    cx, cy, bw, bh, t = rng.rand(num_boxes, 5).T
+    bboxes = np.vstack([cx * w, cy * h, w * bw, h * bh, t]).T
+    return bboxes
+
+def _rand_masks(rng, num_boxes, bboxes, img_w, img_h):
+    from mmdet.structures.mask import BitmapMasks
+    masks = np.zeros((num_boxes, img_h, img_w))
+    for i, bbox in enumerate(bboxes):
+        bbox = bbox.astype(np.int32)
+        mask = (rng.rand(1, bbox[3] - bbox[1], bbox[2] - bbox[0]) >
+                0.3).astype(np.int)
+        masks[i:i + 1, bbox[1]:bbox[3], bbox[0]:bbox[2]] = mask
+    return BitmapMasks(masks, height=img_h, width=img_w)
+
+def demo_mm_inputs(batch_size=2,
+                   image_shapes=(3, 128, 128),
+                   num_items=None,
+                   num_classes=10,
+                   sem_seg_output_strides=1,
+                   with_mask=False,
+                   with_semantic=False):
+    """Create a superset of inputs needed to run test or train batches.
+
+    Args:
+        batch_size (int): batch size. Defaults to 2.
+        image_shapes (List[tuple], Optional): image shape.
+            Defaults to (3, 128, 128)
+        num_items (None | List[int]): specifies the number
+            of boxes in each batch item. Default to None.
+        num_classes (int): number of different labels a
+            box might have. Defaults to 10.
+        with_mask (bool): Whether to return mask annotation.
+            Defaults to False.
+        with_semantic (bool): whether to return semantic.
+            Defaults to False.
+    """
+    rng = np.random.RandomState(0)
+
+    if isinstance(image_shapes, list):
+        assert len(image_shapes) == batch_size
+    else:
+        image_shapes = [image_shapes] * batch_size
+
+    if isinstance(num_items, list):
+        assert len(num_items) == batch_size
+
+    packed_inputs = []
+    for idx in range(batch_size):
+        image_shape = image_shapes[idx]
+        c, h, w = image_shape
+
+        image = rng.randint(0, 255, size=image_shape, dtype=np.uint8)
+
+        mm_inputs = dict()
+        mm_inputs['inputs'] = torch.from_numpy(image)
+
+        img_meta = {
+            'img_id': idx,
+            'img_shape': image_shape[1:],
+            'ori_shape': image_shape[1:],
+            'filename': '<demo>.png',
+            'scale_factor': np.array([1.1, 1.2]),
+            'flip': False,
+            'flip_direction': None,
+            'border': [1, 1, 1, 1]  # Only used by CenterNet
+        }
+
+        data_sample = DetDataSample()
+        data_sample.set_metainfo(img_meta)
+
+        # gt_instances
+        gt_instances = InstanceData()
+        if num_items is None:
+            num_boxes = rng.randint(1, 10)
+        else:
+            num_boxes = num_items[idx]
+
+        bboxes = _rand_bboxes(rng, num_boxes, w, h)
+        labels = rng.randint(1, num_classes, size=num_boxes)
+        gt_instances.bboxes = RotatedBoxes(torch.FloatTensor(bboxes))
+        gt_instances.labels = torch.LongTensor(labels)
+
+        if with_mask:
+            masks = _rand_masks(rng, num_boxes, bboxes, w, h)
+            gt_instances.masks = masks
+
+        # TODO: waiting for ci to be fixed
+        # masks = np.random.randint(0, 2, (len(bboxes), h, w), dtype=np.uint8)
+        # gt_instances.mask = BitmapMasks(masks, h, w)
+
+        data_sample.gt_instances = gt_instances
+
+        # ignore_instances
+        ignore_instances = InstanceData()
+        bboxes = _rand_bboxes(rng, num_boxes, w, h)
+        ignore_instances.bboxes = torch.FloatTensor(bboxes)
+        data_sample.ignored_instances = ignore_instances
+
+        # gt_sem_seg
+        if with_semantic:
+            # assume gt_semantic_seg using scale 1/8 of the img
+            gt_semantic_seg = torch.from_numpy(
+                np.random.randint(
+                    0,
+                    num_classes, (1, h // sem_seg_output_strides,
+                                  w // sem_seg_output_strides),
+                    dtype=np.uint8))
+            gt_sem_seg_data = dict(sem_seg=gt_semantic_seg)
+            data_sample.gt_sem_seg = PixelData(**gt_sem_seg_data)
+
+        mm_inputs['data_sample'] = data_sample
+
+        # TODO: gt_ignore
+
+        packed_inputs.append(mm_inputs)
+    return packed_inputs
diff --git a/tests/test_models/test_detectors/test_single_stage.py b/tests/test_models/test_detectors/test_single_stage.py
new file mode 100644
index 000000000..756d80074
--- /dev/null
+++ b/tests/test_models/test_detectors/test_single_stage.py
@@ -0,0 +1,114 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import time
+import unittest
+from unittest import TestCase
+
+import torch
+from mmengine.logging import MessageHub
+from parameterized import parameterized
+
+from mmdet.structures import DetDataSample
+from mmrotate.testing import demo_mm_inputs, get_detector_cfg
+from mmrotate.utils import register_all_modules
+
+
+class TestSingleStageDetector(TestCase):
+
+    def setUp(self):
+        register_all_modules()
+
+    @parameterized.expand([
+        'rotated_retinanet/rotated_retinanet_obb_r50_fpn_1x_dota_le90.py',
+    ])
+    def test_init(self, cfg_file):
+        model = get_detector_cfg(cfg_file)
+        model.backbone.init_cfg = None
+
+        from mmrotate.models import build_detector
+        detector = build_detector(model)
+        self.assertTrue(detector.backbone)
+        self.assertTrue(detector.neck)
+        self.assertTrue(detector.bbox_head)
+
+    @parameterized.expand([
+        ('rotated_retinanet/rotated_retinanet_obb_r50_fpn_1x_dota_le90.py', ('cpu', 'cuda')),
+    ])
+    def test_single_stage_forward_loss_mode(self, cfg_file, devices):
+        message_hub = MessageHub.get_instance(
+            f'test_single_stage_forward_loss_mode-{time.time()}')
+        message_hub.update_info('iter', 0)
+        message_hub.update_info('epoch', 0)
+        model = get_detector_cfg(cfg_file)
+        model.backbone.init_cfg = None
+
+        from mmrotate.models import build_detector
+        assert all([device in ['cpu', 'cuda'] for device in devices])
+
+        for device in devices:
+            detector = build_detector(model)
+            detector.init_weights()
+
+            if device == 'cuda':
+                if not torch.cuda.is_available():
+                    return unittest.skip('test requires GPU and torch+cuda')
+                detector = detector.cuda()
+
+            packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
+            batch_inputs, data_samples = detector.data_preprocessor(
+                packed_inputs, True)
+            losses = detector.forward(batch_inputs, data_samples, mode='loss')
+            self.assertIsInstance(losses, dict)
+
+    @parameterized.expand([
+        ('rotated_retinanet/rotated_retinanet_obb_r50_fpn_1x_dota_le90.py', ('cpu', 'cuda')),
+    ])
+    def test_single_stage_forward_predict_mode(self, cfg_file, devices):
+        model = get_detector_cfg(cfg_file)
+        model.backbone.init_cfg = None
+
+        from mmrotate.models import build_detector
+        assert all([device in ['cpu', 'cuda'] for device in devices])
+
+        for device in devices:
+            detector = build_detector(model)
+
+            if device == 'cuda':
+                if not torch.cuda.is_available():
+                    return unittest.skip('test requires GPU and torch+cuda')
+                detector = detector.cuda()
+
+            packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
+            batch_inputs, data_samples = detector.data_preprocessor(
+                packed_inputs, False)
+            # Test forward test
+            detector.eval()
+            with torch.no_grad():
+                batch_results = detector.forward(
+                    batch_inputs, data_samples, mode='predict')
+                self.assertEqual(len(batch_results), 2)
+                self.assertIsInstance(batch_results[0], DetDataSample)
+
+    @parameterized.expand([
+        ('rotated_retinanet/rotated_retinanet_obb_r50_fpn_1x_dota_le90.py', ('cpu', 'cuda')),
+    ])
+    def test_single_stage_forward_tensor_mode(self, cfg_file, devices):
+        model = get_detector_cfg(cfg_file)
+        model.backbone.init_cfg = None
+
+        from mmrotate.models import build_detector
+        assert all([device in ['cpu', 'cuda'] for device in devices])
+
+        for device in devices:
+            detector = build_detector(model)
+
+            if device == 'cuda':
+                if not torch.cuda.is_available():
+                    return unittest.skip('test requires GPU and torch+cuda')
+                detector = detector.cuda()
+
+            packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
+            batch_inputs, data_samples = detector.data_preprocessor(
+                packed_inputs, False)
+            batch_results = detector.forward(
+                batch_inputs, data_samples, mode='tensor')
+            self.assertIsInstance(batch_results, tuple)
diff --git a/tests/test_models/test_task_modules/test_coder/test_delta_xywht_rbbox_coder.py b/tests/test_models/test_task_modules/test_coder/test_delta_xywht_rbbox_coder.py
new file mode 100644
index 000000000..6953bfb45
--- /dev/null
+++ b/tests/test_models/test_task_modules/test_coder/test_delta_xywht_rbbox_coder.py
@@ -0,0 +1,2 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+# TODO
\ No newline at end of file
diff --git a/tests/test_models/test_task_modules/test_prior_generators/test_anchor_generator.py b/tests/test_models/test_task_modules/test_prior_generators/test_anchor_generator.py
new file mode 100644
index 000000000..6953bfb45
--- /dev/null
+++ b/tests/test_models/test_task_modules/test_prior_generators/test_anchor_generator.py
@@ -0,0 +1,2 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+# TODO
\ No newline at end of file