From 3fcf9b3f0c912b2472c25c2e4812d83df2c808d8 Mon Sep 17 00:00:00 2001 From: Yanyi Liu Date: Thu, 13 Oct 2022 12:40:31 +0800 Subject: [PATCH 01/52] Add DistributionAngleCoder --- ..._fcos_distribution_r50_fpn_6x_hrsc_le90.py | 68 +++++++++++++++++++ .../models/task_modules/coders/__init__.py | 5 +- .../models/task_modules/coders/angle_coder.py | 30 ++++++++ 3 files changed, 101 insertions(+), 2 deletions(-) create mode 100644 configs/rotated_fcos/rotated_fcos_distribution_r50_fpn_6x_hrsc_le90.py diff --git a/configs/rotated_fcos/rotated_fcos_distribution_r50_fpn_6x_hrsc_le90.py b/configs/rotated_fcos/rotated_fcos_distribution_r50_fpn_6x_hrsc_le90.py new file mode 100644 index 000000000..e7a9d8f35 --- /dev/null +++ b/configs/rotated_fcos/rotated_fcos_distribution_r50_fpn_6x_hrsc_le90.py @@ -0,0 +1,68 @@ +_base_ = [ + '../_base_/datasets/hrsc.py', '../_base_/schedules/schedule_6x.py', + '../_base_/default_runtime.py' +] +angle_version = 'le90' + +# model settings +model = dict( + type='mmdet.FCOS', + data_preprocessor=dict( + type='mmdet.DetDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_size_divisor=32, + boxtype2tensor=False), + backbone=dict( + type='mmdet.ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + type='mmdet.FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_output', + num_outs=5, + relu_before_extra_convs=True), + bbox_head=dict( + type='RotatedFCOSHead', + num_classes=1, + in_channels=256, + stacked_convs=4, + feat_channels=256, + strides=[8, 16, 32, 64, 128], + center_sampling=True, + center_sample_radius=1.5, + norm_on_bbox=True, + centerness_on_reg=True, + use_hbbox_loss=False, + scale_angle=True, + bbox_coder=dict( + type='DistanceAnglePointCoder', angle_version=angle_version), + loss_cls=dict( + type='mmdet.FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='RotatedIoULoss', loss_weight=1.0), + angle_coder=dict(type='DistributionAngleCoder', reg_max=16), + loss_angle=dict(type='mmdet.DistributionFocalLoss', loss_weight=0.20), + loss_centerness=dict( + type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), + # training and testing settings + train_cfg=None, + test_cfg=dict( + nms_pre=2000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms_rotated', iou_threshold=0.1), + max_per_img=2000)) diff --git a/mmrotate/models/task_modules/coders/__init__.py b/mmrotate/models/task_modules/coders/__init__.py index 0f903e7b1..ec9f54117 100644 --- a/mmrotate/models/task_modules/coders/__init__.py +++ b/mmrotate/models/task_modules/coders/__init__.py @@ -1,5 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .angle_coder import CSLCoder, PseudoAngleCoder +from .angle_coder import CSLCoder, DistributionAngleCoder, PseudoAngleCoder from .delta_midpointoffset_rbbox_coder import MidpointOffsetCoder from .delta_xywh_hbbox_coder import DeltaXYWHHBBoxCoder from .delta_xywh_qbbox_coder import DeltaXYWHQBBoxCoder @@ -11,5 +11,6 @@ __all__ = [ 'DeltaXYWHTRBBoxCoder', 'DeltaXYWHTHBBoxCoder', 'MidpointOffsetCoder', 'GVFixCoder', 'GVRatioCoder', 'CSLCoder', 'DistanceAnglePointCoder', - 'DeltaXYWHHBBoxCoder', 'DeltaXYWHQBBoxCoder', 'PseudoAngleCoder' + 'DeltaXYWHHBBoxCoder', 'DeltaXYWHQBBoxCoder', 'PseudoAngleCoder', + 'DistributionAngleCoder' ] diff --git a/mmrotate/models/task_modules/coders/angle_coder.py b/mmrotate/models/task_modules/coders/angle_coder.py index 31070180a..9f6ecbdb8 100644 --- a/mmrotate/models/task_modules/coders/angle_coder.py +++ b/mmrotate/models/task_modules/coders/angle_coder.py @@ -1,7 +1,9 @@ # Copyright (c) OpenMMLab. All rights reserved. import math +import numpy as np import torch +import torch.nn.functional as F from mmdet.models.task_modules.coders.base_bbox_coder import BaseBBoxCoder from torch import Tensor @@ -139,3 +141,31 @@ def decode(self, angle_preds: Tensor, keepdim: bool = False) -> Tensor: return angle_preds else: return angle_preds.squeeze(-1) + + +@TASK_UTILS.register_module() +class DistributionAngleCoder(BaseBBoxCoder): + + def __init__(self, angle_version='le90', reg_max=16): + super().__init__() + self.angle_range = 0.5 * np.pi if angle_version == 'oc' else np.pi + self.angle_offset_dict = { + 'oc': 0, + 'le90': 0.5 * np.pi, + 'le135': 0.25 * np.pi + } + self.angle_offset = self.angle_offset_dict[angle_version] + self.reg_max = reg_max + self.encode_size = reg_max + 1 + self.project = torch.linspace(0, self.reg_max, self.reg_max + 1) + + def encode(self, angle): + # Norm to (0~1)*reg_max + dfl_target = self.reg_max * (self.angle_offset + + angle) / self.angle_range + return dfl_target.flatten() + + def decode(self, angle, keepdim=True): + angle = F.softmax(angle.reshape(-1, self.reg_max + 1), dim=-1) + angle = F.linear(angle, self.project.type_as(angle)).reshape(-1, 1) + return self.angle_range * angle / self.reg_max - self.angle_offset From e68e0d3e54d34a2637fcb88629d0a2737719f78a Mon Sep 17 00:00:00 2001 From: Yanyi Liu Date: Wed, 19 Oct 2022 19:38:20 +0800 Subject: [PATCH 02/52] fix keepdim --- .../models/task_modules/coders/angle_coder.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/mmrotate/models/task_modules/coders/angle_coder.py b/mmrotate/models/task_modules/coders/angle_coder.py index 9f6ecbdb8..7941cd423 100644 --- a/mmrotate/models/task_modules/coders/angle_coder.py +++ b/mmrotate/models/task_modules/coders/angle_coder.py @@ -145,8 +145,14 @@ def decode(self, angle_preds: Tensor, keepdim: bool = False) -> Tensor: @TASK_UTILS.register_module() class DistributionAngleCoder(BaseBBoxCoder): + """Distribution representation for angle. - def __init__(self, angle_version='le90', reg_max=16): + Args: + angle_version (str): Angle definition. + reg_max (int): Max value of integral. Defaults to 16. + """ + + def __init__(self, angle_version: str = 'le90', reg_max: int = 16): super().__init__() self.angle_range = 0.5 * np.pi if angle_version == 'oc' else np.pi self.angle_offset_dict = { @@ -159,13 +165,17 @@ def __init__(self, angle_version='le90', reg_max=16): self.encode_size = reg_max + 1 self.project = torch.linspace(0, self.reg_max, self.reg_max + 1) - def encode(self, angle): + def encode(self, angle: Tensor) -> Tensor: # Norm to (0~1)*reg_max dfl_target = self.reg_max * (self.angle_offset + angle) / self.angle_range return dfl_target.flatten() - def decode(self, angle, keepdim=True): + def decode(self, angle: Tensor, keepdim: bool = False) -> Tensor: angle = F.softmax(angle.reshape(-1, self.reg_max + 1), dim=-1) - angle = F.linear(angle, self.project.type_as(angle)).reshape(-1, 1) + angle = F.linear(angle, self.project.type_as(angle)) + if keepdim: + angle = angle.reshape(-1, 1) + else: + angle = angle.reshape(-1) return self.angle_range * angle / self.reg_max - self.angle_offset From 2977388768c544afcc03d166e903458ca03e1478 Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Tue, 1 Nov 2022 10:47:49 +0800 Subject: [PATCH 03/52] rtm init --- .../rotated_rtmdet_l-300e-hrsc.py | 208 ++++ .../rotated_rtmdet_s-300e-hrsc.py | 70 ++ .../rotated_rtmdet_tiny-300e-hrsc.py | 50 + mmrotate/models/dense_heads/__init__.py | 4 +- .../models/dense_heads/rotated_rtmdet_head.py | 910 ++++++++++++++++++ .../coders/distance_angle_point_coder.py | 16 +- 6 files changed, 1249 insertions(+), 9 deletions(-) create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_l-300e-hrsc.py create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_s-300e-hrsc.py create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_tiny-300e-hrsc.py create mode 100644 mmrotate/models/dense_heads/rotated_rtmdet_head.py diff --git a/configs/rotated_rtmdet/rotated_rtmdet_l-300e-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_l-300e-hrsc.py new file mode 100644 index 000000000..e43377a27 --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_l-300e-hrsc.py @@ -0,0 +1,208 @@ +_base_ = [ + '../_base_/default_runtime.py', '../_base_/schedules/schedule_1x.py', + '../_base_/datasets/hrsc.py' +] +model = dict( + type='mmdet.RTMDet', + data_preprocessor=dict( + type='mmdet.DetDataPreprocessor', + mean=[103.53, 116.28, 123.675], + std=[57.375, 57.12, 58.395], + bgr_to_rgb=False, + boxtype2tensor=False, + batch_augments=None), + backbone=dict( + type='mmdet.CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=1, + widen_factor=1, + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU')), + neck=dict( + type='mmdet.CSPNeXtPAFPN', + in_channels=[256, 512, 1024], + out_channels=256, + num_csp_blocks=3, + expand_ratio=0.5, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU')), + bbox_head=dict( + type='RotatedRTMDetSepBNHead', + # type='RTMDetSepBNHead', + num_classes=1, + in_channels=256, + stacked_convs=2, + feat_channels=256, + anchor_generator=dict( + type='mmdet.MlvlPointGenerator', offset=0, strides=[8, 16, 32]), + bbox_coder=dict(type='DistanceAnglePointCoder', angle_version='le90'), + loss_cls=dict( + type='mmdet.QualityFocalLoss', + use_sigmoid=True, + beta=2.0, + loss_weight=1.0), + loss_bbox=dict(type='RotatedIoULoss', mode='linear', loss_weight=2.0), + with_objectness=False, + exp_on_reg=True, + share_conv=True, + pred_kernel_size=1, + use_hbbox_loss=False, + scale_angle=False, + # angle_coder=dict( + # _scope_='mmrotate', + # type='CSLCoder', + # angle_version='le90', + # omega=4, + # window='gaussian', + # radius=3), + # loss_angle=dict( + # _scope_='mmrotate', + # type='SmoothFocalLoss', + # gamma=2.0, + # alpha=0.25, + # loss_weight=0.8), + # loss_bbox=dict(type='mmdet.GIoULoss', loss_weight=2.0), + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU')), + train_cfg=dict( + assigner=dict( + type='mmdet.DynamicSoftLabelAssigner', + iou_calculator=dict(type='RBboxOverlaps2D'), + topk=13), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=2000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms_rotated', iou_threshold=0.1), + max_per_img=2000), +) + +train_pipeline = [ + dict( + type='mmdet.LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict(type='mmdet.CachedMosaic', img_scale=(800, 800), pad_val=114.0), + dict( + type='mmdet.RandomResize', + resize_type='mmdet.Resize', + scale=(1600, 1600), + ratio_range=(0.1, 2.0), + keep_ratio=True), + dict(type='mmdet.RandomCrop', crop_size=(800, 800)), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict(type='mmdet.RandomFlip', prob=0.5), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.CachedMixUp', + img_scale=(800, 800), + ratio_range=(1.0, 1.0), + max_cached_images=20, + pad_val=(114, 114, 114)), + dict(type='mmdet.PackDetInputs') +] + +train_pipeline_stage2 = [ + dict( + type='mmdet.LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict( + type='mmdet.RandomResize', + resize_type='mmdet.Resize', + scale=(1600, 1600), + ratio_range=(0.1, 2.0), + keep_ratio=True), + dict(type='mmdet.RandomCrop', crop_size=(800, 800)), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict(type='mmdet.RandomFlip', prob=0.5), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict(type='mmdet.PackDetInputs') +] + +test_pipeline = [ + dict( + type='mmdet.LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), + dict(type='mmdet.Resize', scale=(800, 800), keep_ratio=True), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] + +train_dataloader = dict( + batch_size=8, + num_workers=8, + batch_sampler=None, + pin_memory=True, + dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, num_workers=1, dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader + +max_epochs = 300 +stage2_num_epochs = 20 +base_lr = 0.004 / 4 +interval = 20 + +train_cfg = dict( + max_epochs=max_epochs, + val_interval=interval, + dynamic_intervals=[(max_epochs - stage2_num_epochs, 10)]) + +# optimizer +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# hooks +default_hooks = dict( + checkpoint=dict( + interval=interval, + max_keep_ckpts=3 # only keep latest 3 checkpoints + )) +custom_hooks = [ + dict( + type='EMAHook', + ema_type='mmdet.ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] diff --git a/configs/rotated_rtmdet/rotated_rtmdet_s-300e-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_s-300e-hrsc.py new file mode 100644 index 000000000..a3b52616d --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_s-300e-hrsc.py @@ -0,0 +1,70 @@ +_base_ = './rotated_rtmdet_l-300e-hrsc.py' +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' # noqa +model = dict( + backbone=dict( + deepen_factor=0.33, + widen_factor=0.5, + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint)), + neck=dict(in_channels=[128, 256, 512], out_channels=128, num_csp_blocks=1), + bbox_head=dict(in_channels=128, feat_channels=128, exp_on_reg=False)) + +train_pipeline = [ + dict( + type='LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict(type='mmdet.CachedMosaic', img_scale=(800, 800), pad_val=114.0), + dict( + type='RandomResize', + resize_type='mmdet.Resize', + scale=(1600, 1600), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='mmdet.RandomCrop', crop_size=(800, 800)), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict(type='mmdet.RandomFlip', prob=0.5), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.CachedMixUp', + img_scale=(800, 800), + ratio_range=(1.0, 1.0), + max_cached_images=20, + pad_val=(114, 114, 114)), + dict(type='mmdet.PackDetInputs') +] + +train_pipeline_stage2 = [ + dict( + type='LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict( + type='RandomResize', + resize_type='mmdet.Resize', + scale=(1600, 1600), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='mmdet.RandomCrop', crop_size=(800, 800)), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict(type='mmdet.RandomFlip', prob=0.5), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict(type='mmdet.PackDetInputs') +] + +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='mmdet.ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=280, + switch_pipeline=train_pipeline_stage2) +] \ No newline at end of file diff --git a/configs/rotated_rtmdet/rotated_rtmdet_tiny-300e-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_tiny-300e-hrsc.py new file mode 100644 index 000000000..1df2d99b3 --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_tiny-300e-hrsc.py @@ -0,0 +1,50 @@ +_base_ = './rotated_rtmdet_s-300e-hrsc.py' + +cocop = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth' # noqa +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa + +model = dict( + init_cfg=dict(type='Pretrained', checkpoint=cocop), + backbone=dict( + deepen_factor=0.167, + widen_factor=0.375, + # init_cfg=dict( + # type='Pretrained', prefix='backbone.', checkpoint=checkpoint) + ), + neck=dict(in_channels=[96, 192, 384], out_channels=96, num_csp_blocks=1), + bbox_head=dict(in_channels=96, feat_channels=96, exp_on_reg=False)) + +train_pipeline = [ + dict( + type='LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict( + type='mmdet.CachedMosaic', + img_scale=(800, 800), + pad_val=114.0, + max_cached_images=20, + random_pop=False), + dict( + type='RandomResize', + resize_type='mmdet.Resize', + scale=(1600, 1600), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='mmdet.RandomCrop', crop_size=(800, 800)), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict(type='mmdet.RandomFlip', prob=0.5), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.CachedMixUp', + img_scale=(800, 800), + ratio_range=(1.0, 1.0), + max_cached_images=10, + random_pop=False, + pad_val=(114, 114, 114), + prob=0.5), + dict(type='mmdet.PackDetInputs') +] + +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/mmrotate/models/dense_heads/__init__.py b/mmrotate/models/dense_heads/__init__.py index d2ac36c40..311aa2c83 100644 --- a/mmrotate/models/dense_heads/__init__.py +++ b/mmrotate/models/dense_heads/__init__.py @@ -10,10 +10,12 @@ from .rotated_retina_head import RotatedRetinaHead from .s2a_head import S2AHead, S2ARefineHead from .sam_reppoints_head import SAMRepPointsHead +from .rotated_rtmdet_head import RotatedRTMDetHead, RotatedRTMDetSepBNHead __all__ = [ 'RotatedRetinaHead', 'OrientedRPNHead', 'RotatedRepPointsHead', 'SAMRepPointsHead', 'AngleBranchRetinaHead', 'RotatedATSSHead', 'RotatedFCOSHead', 'OrientedRepPointsHead', 'R3Head', 'R3RefineHead', - 'S2AHead', 'S2ARefineHead', 'CFAHead' + 'S2AHead', 'S2ARefineHead', 'CFAHead', 'RotatedRTMDetHead', + 'RotatedRTMDetSepBNHead' ] diff --git a/mmrotate/models/dense_heads/rotated_rtmdet_head.py b/mmrotate/models/dense_heads/rotated_rtmdet_head.py new file mode 100644 index 000000000..e0f1fbdde --- /dev/null +++ b/mmrotate/models/dense_heads/rotated_rtmdet_head.py @@ -0,0 +1,910 @@ +import copy +from typing import Tuple, List, Optional + +import torch +from mmcv.cnn import Scale, ConvModule, is_norm +from mmdet.models import inverse_sigmoid +from mmdet.models.dense_heads import RTMDetHead +from mmdet.models.task_modules import anchor_inside_flags +from mmdet.models.utils import sigmoid_geometric_mean, multi_apply, unmap, select_single_mlvl, filter_scores_and_topk +from mmdet.structures.bbox import distance2bbox, get_box_tensor, cat_boxes, bbox_cxcywh_to_xyxy +from mmdet.utils import ConfigType, OptConfigType, InstanceList, OptInstanceList, reduce_mean +from mmengine import ConfigDict +from mmengine.model import normal_init, constant_init, bias_init_with_prob +from mmengine.structures import InstanceData +from mmrotate.registry import MODELS, TASK_UTILS + +from mmrotate.models.task_modules.coders import DistanceAnglePointCoder +from mmrotate.structures import norm_angle, RotatedBoxes +from torch import nn, Tensor + + +# TODO move to mmrotate.structures.transform and update Coder +def distance2obb(points, + distance, + angle_version='oc'): + distance, angle = distance.split([4, 1], dim=-1) + + cos_angle, sin_angle = torch.cos(angle), torch.sin(angle) + + rot_matrix = torch.cat([cos_angle, -sin_angle, sin_angle, cos_angle], dim=-1) + rot_matrix = rot_matrix.reshape(*rot_matrix.shape[:-1], 2, 2) + + wh = distance[..., :2] + distance[..., 2:] + offset_t = (distance[..., 2:] - distance[..., :2]) / 2 + offset_t = offset_t.unsqueeze(-1) + offset = torch.matmul(rot_matrix, offset_t).squeeze(-1) + ctr = points[..., :2] + offset + + angle_regular = norm_angle(angle, angle_version) + return torch.cat([ctr, wh, angle_regular], dim=-1) + + +@MODELS.register_module() +class RotatedRTMDetHead(RTMDetHead): + """Detection Head of Rotated RTMDet. + + Args: + num_classes (int): Number of categories excluding the background + category. + in_channels (int): Number of channels in the input feature map. + with_objectness (bool): Whether to add an objectness branch. + Defaults to True. + act_cfg (:obj:`ConfigDict` or dict): Config dict for activation layer. + Default: dict(type='ReLU') + """ + + def __init__(self, + num_classes: int, + in_channels: int, + use_hbbox_loss: bool = False, + scale_angle: bool = True, + angle_coder: ConfigType = dict(type='PseudoAngleCoder'), + loss_angle: OptConfigType = None, + **kwargs) -> None: + self.use_hbbox_loss = use_hbbox_loss + self.is_scale_angle = scale_angle + self.angle_coder = TASK_UTILS.build(angle_coder) + super().__init__( + num_classes, + in_channels, + # useless, but error + loss_centerness=dict( + type='mmdet.CrossEntropyLoss', + use_sigmoid=True, + loss_weight=1.0), + **kwargs) + if loss_angle is not None: + self.loss_angle = MODELS.build(loss_angle) + else: + self.loss_angle = None + + def _init_layers(self): + """Initialize layers of the head.""" + super()._init_layers() + pred_pad_size = self.pred_kernel_size // 2 + self.rtm_ang = nn.Conv2d( + self.feat_channels, + self.num_base_priors * self.angle_coder.encode_size, + self.pred_kernel_size, + padding=pred_pad_size) + if self.is_scale_angle: + self.scale_angle = Scale(1.0) + + def init_weights(self) -> None: + """Initialize weights of the head.""" + super().init_weights() + normal_init(self.rtm_ang, std=0.01) + + def forward(self, feats: Tuple[Tensor, ...]) -> tuple: + """Forward features from the upstream network. + + Args: + feats (tuple[Tensor]): Features from the upstream network, each is + a 4D-tensor. + + Returns: + tuple: Usually a tuple of classification scores and bbox prediction + - cls_scores (list[Tensor]): Classification scores for all scale + levels, each is a 4D-tensor, the channels number is + num_base_priors * num_classes. + - bbox_preds (list[Tensor]): Box energies / deltas for all scale + levels, each is a 4D-tensor, the channels number is + num_base_priors * 4. + - angle_preds (list[Tensor]): Angle prediction for all scale + levels, each is a 4D-tensor, the channels number is + num_base_priors * angle_dim. + """ + + cls_scores = [] + bbox_preds = [] + angle_preds = [] + for idx, (x, scale, stride) in enumerate( + zip(feats, self.scales, self.prior_generator.strides)): + cls_feat = x + reg_feat = x + + for cls_layer in self.cls_convs: + cls_feat = cls_layer(cls_feat) + cls_score = self.rtm_cls(cls_feat) + + for reg_layer in self.reg_convs: + reg_feat = reg_layer(reg_feat) + + if self.with_objectness: + objectness = self.rtm_obj(reg_feat) + cls_score = inverse_sigmoid( + sigmoid_geometric_mean(cls_score, objectness)) + + reg_dist = scale(self.rtm_reg(reg_feat).exp()).float() * stride[0] + if self.is_scale_angle: + angle_pred = self.scale_angle(self.rtm_ang(reg_feat)).float() + else: + angle_pred = self.rtm_ang(reg_feat).float() + + cls_scores.append(cls_score) + bbox_preds.append(reg_dist) + angle_preds.append(angle_pred) + return tuple(cls_scores), tuple(bbox_preds), tuple(angle_preds) + + def loss_by_feat_single(self, cls_score: Tensor, bbox_pred: Tensor, angle_pred: Tensor, + labels: Tensor, label_weights: Tensor, + bbox_targets: Tensor, assign_metrics: Tensor, + stride: List[int]): + """Compute loss of a single scale level. + + Args: + cls_score (Tensor): Box scores for each scale level + Has shape (N, num_anchors * num_classes, H, W). + bbox_pred (Tensor): Decoded bboxes for each scale + level with shape (N, num_anchors * 4, H, W). + labels (Tensor): Labels of each anchors with shape + (N, num_total_anchors). + label_weights (Tensor): Label weights of each anchor with shape + (N, num_total_anchors). + bbox_targets (Tensor): BBox regression targets of each anchor with + shape (N, num_total_anchors, 4). + assign_metrics (Tensor): Assign metrics with shape + (N, num_total_anchors). + stride (List[int]): Downsample stride of the feature map. + + Returns: + dict[str, Tensor]: A dictionary of loss components. + """ + assert stride[0] == stride[1], 'h stride is not equal to w stride!' + cls_score = cls_score.permute(0, 2, 3, 1).reshape( + -1, self.cls_out_channels).contiguous() + + if self.use_hbbox_loss: + bbox_pred = bbox_pred.reshape(-1, 4) + else: + bbox_pred = bbox_pred.reshape(-1, 5) + bbox_targets = bbox_targets.reshape(-1, 5) + + labels = labels.reshape(-1) + assign_metrics = assign_metrics.reshape(-1) + label_weights = label_weights.reshape(-1) + targets = (labels, assign_metrics) + + loss_cls = self.loss_cls( + cls_score, targets, label_weights, avg_factor=1.0) + + # FG cat_id: [0, num_classes -1], BG cat_id: num_classes + bg_class_ind = self.num_classes + pos_inds = ((labels >= 0) + & (labels < bg_class_ind)).nonzero().squeeze(1) + + if len(pos_inds) > 0: + pos_bbox_targets = bbox_targets[pos_inds] + pos_bbox_pred = bbox_pred[pos_inds] + + pos_decode_bbox_pred = pos_bbox_pred + pos_decode_bbox_targets = pos_bbox_targets + if self.use_hbbox_loss: + pos_decode_bbox_targets = bbox_cxcywh_to_xyxy(pos_bbox_targets[:, :4]) + + # regression loss + pos_bbox_weight = assign_metrics[pos_inds] + + loss_angle = angle_pred.sum() * 0 + if self.loss_angle is not None: + angle_pred = angle_pred.reshape(-1, self.angle_coder.encode_size) + pos_angle_pred = angle_pred[pos_inds] + pos_angle_target = pos_bbox_targets[:, 4:5] + pos_angle_target = self.angle_coder.encode(pos_angle_target) + loss_angle = self.loss_angle( + pos_angle_pred, + pos_angle_target, + weight=pos_bbox_weight, + avg_factor=1.0) + + loss_bbox = self.loss_bbox( + pos_decode_bbox_pred, + pos_decode_bbox_targets, + weight=pos_bbox_weight, + avg_factor=1.0) + + else: + loss_bbox = bbox_pred.sum() * 0 + pos_bbox_weight = bbox_targets.new_tensor(0.) + loss_angle = angle_pred.sum() * 0 + + return loss_cls, loss_bbox, loss_angle, assign_metrics.sum(), pos_bbox_weight.sum(), pos_bbox_weight.sum() + + def loss_by_feat(self, + cls_scores: List[Tensor], + bbox_preds: List[Tensor], + angle_preds: List[Tensor], + batch_gt_instances: InstanceList, + batch_img_metas: List[dict], + batch_gt_instances_ignore: OptInstanceList = None): + """Compute losses of the head. + + Args: + cls_scores (list[Tensor]): Box scores for each scale level + Has shape (N, num_anchors * num_classes, H, W) + bbox_preds (list[Tensor]): Decoded box for each scale + level with shape (N, num_anchors * 4, H, W) in + [tl_x, tl_y, br_x, br_y] format. + batch_gt_instances (list[:obj:`InstanceData`]): Batch of + gt_instance. It usually includes ``bboxes`` and ``labels`` + attributes. + batch_img_metas (list[dict]): Meta information of each image, e.g., + image size, scaling factor, etc. + batch_gt_instances_ignore (list[:obj:`InstanceData`], Optional): + Batch of gt_instances_ignore. It includes ``bboxes`` attribute + data that is ignored during training and testing. + Defaults to None. + + Returns: + dict[str, Tensor]: A dictionary of loss components. + """ + num_imgs = len(batch_img_metas) + featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] + assert len(featmap_sizes) == self.prior_generator.num_levels + + device = cls_scores[0].device + anchor_list, valid_flag_list = self.get_anchors( + featmap_sizes, batch_img_metas, device=device) + flatten_cls_scores = torch.cat([ + cls_score.permute(0, 2, 3, 1).reshape(num_imgs, -1, + self.cls_out_channels) + for cls_score in cls_scores + ], 1) + + decoded_bboxes = [] + decoded_hbboxes = [] + angle_preds_list = [] + for anchor, bbox_pred, angle_pred in zip(anchor_list[0], bbox_preds, angle_preds): + anchor = anchor.reshape(-1, 4) + bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, 4) + angle_pred = angle_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, self.angle_coder.encode_size) + + if self.use_hbbox_loss: + hbbox_pred = distance2bbox(anchor, bbox_pred) + decoded_hbboxes.append(hbbox_pred) + + decoded_angle = self.angle_coder.decode(angle_pred, keepdim=True) + bbox_pred = torch.cat([bbox_pred, decoded_angle], dim=-1) + + # res = distance2obb(anchor, bbox_pred, 'le90').reshape(-1, 5) + # + # c = DistanceAnglePointCoder(angle_version='le90') + # res2 = c.decode(anchor.repeat(8, 1)[:, :2], bbox_pred.reshape(-1, 5)) + # + # print((res == res2).unique()) + + # TODO add arg angle_version + bbox_pred = distance2obb(anchor, bbox_pred, angle_version='le90') + decoded_bboxes.append(bbox_pred) + angle_preds_list.append(angle_pred) + + # flatten_bboxes is rbox, for target assign + flatten_bboxes = torch.cat(decoded_bboxes, 1) + + cls_reg_targets = self.get_targets( + flatten_cls_scores, + flatten_bboxes, + anchor_list, + valid_flag_list, + batch_gt_instances, + batch_img_metas, + batch_gt_instances_ignore=batch_gt_instances_ignore) + (anchor_list, labels_list, label_weights_list, bbox_targets_list, + assign_metrics_list) = cls_reg_targets + + if self.use_hbbox_loss: + decoded_bboxes = decoded_hbboxes + + losses_cls, losses_bbox, losses_angle, \ + cls_avg_factors, bbox_avg_factors, angle_avg_factors = multi_apply( + self.loss_by_feat_single, + cls_scores, + decoded_bboxes, + angle_preds_list, + labels_list, + label_weights_list, + bbox_targets_list, + assign_metrics_list, + self.prior_generator.strides) + + cls_avg_factor = reduce_mean(sum(cls_avg_factors)).clamp_(min=1).item() + losses_cls = list(map(lambda x: x / cls_avg_factor, losses_cls)) + + bbox_avg_factor = reduce_mean( + sum(bbox_avg_factors)).clamp_(min=1).item() + losses_bbox = list(map(lambda x: x / bbox_avg_factor, losses_bbox)) + if self.loss_angle is not None: + angle_avg_factors = reduce_mean( + sum(angle_avg_factors)).clamp_(min=1).item() + losses_angle = list(map(lambda x: x / angle_avg_factors, losses_angle)) + return dict(loss_cls=losses_cls, loss_bbox=losses_bbox, loss_angle=losses_angle) + else: + return dict(loss_cls=losses_cls, loss_bbox=losses_bbox) + + def _get_targets_single(self, + cls_scores: Tensor, + bbox_preds: Tensor, + flat_anchors: Tensor, + valid_flags: Tensor, + gt_instances: InstanceData, + img_meta: dict, + gt_instances_ignore: Optional[InstanceData] = None, + unmap_outputs=True): + """Compute regression, classification targets for anchors in a single + image. + + Args: + cls_scores (list(Tensor)): Box scores for each image. + bbox_preds (list(Tensor)): Box energies / deltas for each image. + flat_anchors (Tensor): Multi-level anchors of the image, which are + concatenated into a single tensor of shape (num_anchors ,4) + valid_flags (Tensor): Multi level valid flags of the image, + which are concatenated into a single tensor of + shape (num_anchors,). + gt_instances (:obj:`InstanceData`): Ground truth of instance + annotations. It usually includes ``bboxes`` and ``labels`` + attributes. + img_meta (dict): Meta information for current image. + gt_instances_ignore (:obj:`InstanceData`, optional): Instances + to be ignored during training. It includes ``bboxes`` attribute + data that is ignored during training and testing. + Defaults to None. + unmap_outputs (bool): Whether to map outputs back to the original + set of anchors. Defaults to True. + + Returns: + tuple: N is the number of total anchors in the image. + + - anchors (Tensor): All anchors in the image with shape (N, 4). + - labels (Tensor): Labels of all anchors in the image with shape + (N,). + - label_weights (Tensor): Label weights of all anchor in the + image with shape (N,). + - bbox_targets (Tensor): BBox targets of all anchors in the + image with shape (N, 4). + - norm_alignment_metrics (Tensor): Normalized alignment metrics + of all priors in the image with shape (N,). + """ + inside_flags = anchor_inside_flags(flat_anchors, valid_flags, + img_meta['img_shape'][:2], + self.train_cfg['allowed_border']) + if not inside_flags.any(): + return (None,) * 7 + # assign gt and sample anchors + anchors = flat_anchors[inside_flags, :] + + pred_instances = InstanceData( + scores=cls_scores[inside_flags, :], + bboxes=bbox_preds[inside_flags, :], + priors=anchors) + + assign_result = self.assigner.assign(pred_instances, gt_instances, + gt_instances_ignore) + + sampling_result = self.sampler.sample(assign_result, pred_instances, + gt_instances) + + num_valid_anchors = anchors.shape[0] + bbox_targets = anchors.new_zeros((*anchors.size()[:-1], 5)) + labels = anchors.new_full((num_valid_anchors,), + self.num_classes, + dtype=torch.long) + label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) + assign_metrics = anchors.new_zeros( + num_valid_anchors, dtype=torch.float) + + pos_inds = sampling_result.pos_inds + neg_inds = sampling_result.neg_inds + if len(pos_inds) > 0: + # point-based + pos_bbox_targets = sampling_result.pos_gt_bboxes + # TODO add arg angle_version + pos_bbox_targets = pos_bbox_targets.regularize_boxes('le90') + bbox_targets[pos_inds, :] = pos_bbox_targets + + labels[pos_inds] = sampling_result.pos_gt_labels + if self.train_cfg['pos_weight'] <= 0: + label_weights[pos_inds] = 1.0 + else: + label_weights[pos_inds] = self.train_cfg['pos_weight'] + if len(neg_inds) > 0: + label_weights[neg_inds] = 1.0 + + class_assigned_gt_inds = torch.unique( + sampling_result.pos_assigned_gt_inds) + for gt_inds in class_assigned_gt_inds: + gt_class_inds = pos_inds[sampling_result.pos_assigned_gt_inds == + gt_inds] + assign_metrics[gt_class_inds] = assign_result.max_overlaps[ + gt_class_inds] + + # map up to original set of anchors + if unmap_outputs: + num_total_anchors = flat_anchors.size(0) + anchors = unmap(anchors, num_total_anchors, inside_flags) + labels = unmap( + labels, num_total_anchors, inside_flags, fill=self.num_classes) + label_weights = unmap(label_weights, num_total_anchors, + inside_flags) + bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) + assign_metrics = unmap(assign_metrics, num_total_anchors, + inside_flags) + return (anchors, labels, label_weights, bbox_targets, assign_metrics) + + def predict_by_feat(self, + cls_scores: List[Tensor], + bbox_preds: List[Tensor], + angle_preds: List[Tensor], + score_factors: Optional[List[Tensor]] = None, + batch_img_metas: Optional[List[dict]] = None, + cfg: Optional[ConfigDict] = None, + rescale: bool = False, + with_nms: bool = True) -> InstanceList: + """Transform a batch of output features extracted from the head into + bbox results. + Note: When score_factors is not None, the cls_scores are + usually multiplied by it then obtain the real score used in NMS, + such as CenterNess in FCOS, IoU branch in ATSS. + Args: + cls_scores (list[Tensor]): Classification scores for all + scale levels, each is a 4D-tensor, has shape + (batch_size, num_priors * num_classes, H, W). + bbox_preds (list[Tensor]): Box energies / deltas for all + scale levels, each is a 4D-tensor, has shape + (batch_size, num_priors * 4, H, W). + angle_preds (list[Tensor]): Box angle for each scale level + with shape (N, num_points * encode_size, H, W) + score_factors (list[Tensor], optional): Score factor for + all scale level, each is a 4D-tensor, has shape + (batch_size, num_priors * 1, H, W). Defaults to None. + batch_img_metas (list[dict], Optional): Batch image meta info. + Defaults to None. + cfg (ConfigDict, optional): Test / postprocessing + configuration, if None, test_cfg would be used. + Defaults to None. + rescale (bool): If True, return boxes in original image space. + Defaults to False. + with_nms (bool): If True, do nms before return boxes. + Defaults to True. + Returns: + list[:obj:`InstanceData`]: Object detection results of each image + after the post process. Each item usually contains following keys. + - scores (Tensor): Classification scores, has a shape + (num_instance, ) + - labels (Tensor): Labels of bboxes, has a shape + (num_instances, ). + - bboxes (Tensor): Has a shape (num_instances, 5), + the last dimension 5 arrange as (x, y, w, h, t). + """ + assert len(cls_scores) == len(bbox_preds) + + if score_factors is None: + # e.g. Retina, FreeAnchor, Foveabox, etc. + with_score_factors = False + else: + # e.g. FCOS, PAA, ATSS, AutoAssign, etc. + with_score_factors = True + assert len(cls_scores) == len(score_factors) + + num_levels = len(cls_scores) + + featmap_sizes = [cls_scores[i].shape[-2:] for i in range(num_levels)] + mlvl_priors = self.prior_generator.grid_priors( + featmap_sizes, + dtype=cls_scores[0].dtype, + device=cls_scores[0].device) + + result_list = [] + + for img_id in range(len(batch_img_metas)): + img_meta = batch_img_metas[img_id] + cls_score_list = select_single_mlvl( + cls_scores, img_id, detach=True) + bbox_pred_list = select_single_mlvl( + bbox_preds, img_id, detach=True) + angle_pred_list = select_single_mlvl( + angle_preds, img_id, detach=True) + if with_score_factors: + score_factor_list = select_single_mlvl( + score_factors, img_id, detach=True) + else: + score_factor_list = [None for _ in range(num_levels)] + + results = self._predict_by_feat_single( + cls_score_list=cls_score_list, + bbox_pred_list=bbox_pred_list, + angle_pred_list=angle_pred_list, + score_factor_list=score_factor_list, + mlvl_priors=mlvl_priors, + img_meta=img_meta, + cfg=cfg, + rescale=rescale, + with_nms=with_nms) + result_list.append(results) + return result_list + + def _predict_by_feat_single(self, + cls_score_list: List[Tensor], + bbox_pred_list: List[Tensor], + angle_pred_list: List[Tensor], + score_factor_list: List[Tensor], + mlvl_priors: List[Tensor], + img_meta: dict, + cfg: ConfigDict, + rescale: bool = False, + with_nms: bool = True) -> InstanceData: + """Transform a single image's features extracted from the head into + bbox results. + Args: + cls_score_list (list[Tensor]): Box scores from all scale + levels of a single image, each item has shape + (num_priors * num_classes, H, W). + bbox_pred_list (list[Tensor]): Box energies / deltas from + all scale levels of a single image, each item has shape + (num_priors * 4, H, W). + angle_pred_list (list[Tensor]): Box angle for a single scale + level with shape (N, num_points * encode_size, H, W). + score_factor_list (list[Tensor]): Score factor from all scale + levels of a single image, each item has shape + (num_priors * 1, H, W). + mlvl_priors (list[Tensor]): Each element in the list is + the priors of a single level in feature pyramid. In all + anchor-based methods, it has shape (num_priors, 4). In + all anchor-free methods, it has shape (num_priors, 2) + when `with_stride=True`, otherwise it still has shape + (num_priors, 4). + img_meta (dict): Image meta info. + cfg (mmengine.Config): Test / postprocessing configuration, + if None, test_cfg would be used. + rescale (bool): If True, return boxes in original image space. + Defaults to False. + with_nms (bool): If True, do nms before return boxes. + Defaults to True. + Returns: + :obj:`InstanceData`: Detection results of each image + after the post process. + Each item usually contains following keys. + - scores (Tensor): Classification scores, has a shape + (num_instance, ) + - labels (Tensor): Labels of bboxes, has a shape + (num_instances, ). + - bboxes (Tensor): Has a shape (num_instances, 5), + the last dimension 5 arrange as (x, y, w, h, t). + """ + if score_factor_list[0] is None: + # e.g. Retina, FreeAnchor, etc. + with_score_factors = False + else: + # e.g. FCOS, PAA, ATSS, etc. + with_score_factors = True + + cfg = self.test_cfg if cfg is None else cfg + cfg = copy.deepcopy(cfg) + img_shape = img_meta['img_shape'] + nms_pre = cfg.get('nms_pre', -1) + + mlvl_bbox_preds = [] + mlvl_valid_priors = [] + mlvl_scores = [] + mlvl_labels = [] + if with_score_factors: + mlvl_score_factors = [] + else: + mlvl_score_factors = None + for level_idx, ( + cls_score, bbox_pred, angle_pred, score_factor, priors) in \ + enumerate(zip(cls_score_list, bbox_pred_list, angle_pred_list, + score_factor_list, mlvl_priors)): + + assert cls_score.size()[-2:] == bbox_pred.size()[-2:] + + # dim = self.bbox_coder.encode_size + bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4) + angle_pred = angle_pred.permute(1, 2, 0).reshape( + -1, self.angle_coder.encode_size) + if with_score_factors: + score_factor = score_factor.permute(1, 2, + 0).reshape(-1).sigmoid() + cls_score = cls_score.permute(1, 2, + 0).reshape(-1, self.cls_out_channels) + if self.use_sigmoid_cls: + scores = cls_score.sigmoid() + else: + # remind that we set FG labels to [0, num_class-1] + # since mmdet v2.0 + # BG cat_id: num_class + scores = cls_score.softmax(-1)[:, :-1] + + # After https://github.com/open-mmlab/mmdetection/pull/6268/, + # this operation keeps fewer bboxes under the same `nms_pre`. + # There is no difference in performance for most models. If you + # find a slight drop in performance, you can set a larger + # `nms_pre` than before. + score_thr = cfg.get('score_thr', 0) + + results = filter_scores_and_topk( + scores, score_thr, nms_pre, + dict( + bbox_pred=bbox_pred, angle_pred=angle_pred, priors=priors)) + scores, labels, keep_idxs, filtered_results = results + + bbox_pred = filtered_results['bbox_pred'] + angle_pred = filtered_results['angle_pred'] + priors = filtered_results['priors'] + + decoded_angle = self.angle_coder.decode(angle_pred, keepdim=True) + bbox_pred = torch.cat([bbox_pred, decoded_angle], dim=-1) + + if with_score_factors: + score_factor = score_factor[keep_idxs] + + mlvl_bbox_preds.append(bbox_pred) + mlvl_valid_priors.append(priors) + mlvl_scores.append(scores) + mlvl_labels.append(labels) + + if with_score_factors: + mlvl_score_factors.append(score_factor) + + bbox_pred = torch.cat(mlvl_bbox_preds) + priors = cat_boxes(mlvl_valid_priors) + bboxes = self.bbox_coder.decode(priors, bbox_pred, max_shape=img_shape) + + results = InstanceData() + results.bboxes = RotatedBoxes(bboxes) + results.scores = torch.cat(mlvl_scores) + results.labels = torch.cat(mlvl_labels) + if with_score_factors: + results.score_factors = torch.cat(mlvl_score_factors) + + return self._bbox_post_process( + results=results, + cfg=cfg, + rescale=rescale, + with_nms=with_nms, + img_meta=img_meta) + + +@MODELS.register_module() +class RotatedRTMDetSepBNHead(RotatedRTMDetHead): + """Rotated RTMDetHead with separated BN layers and shared conv layers. + + Args: + num_classes (int): Number of categories excluding the background + category. + in_channels (int): Number of channels in the input feature map. + share_conv (bool): Whether to share conv layers between stages. + Defaults to True. + norm_cfg (:obj:`ConfigDict` or dict)): Config dict for normalization + layer. Defaults to dict(type='BN', momentum=0.03, eps=0.001). + act_cfg (:obj:`ConfigDict` or dict)): Config dict for activation layer. + Defaults to dict(type='SiLU'). + pred_kernel_size (int): Kernel size of prediction layer. Defaults to 1. + """ + + def __init__(self, + num_classes: int, + in_channels: int, + share_conv: bool = True, + scale_angle: bool = False, + norm_cfg: ConfigType = dict( + type='BN', momentum=0.03, eps=0.001), + act_cfg: ConfigType = dict(type='SiLU'), + pred_kernel_size: int = 1, + exp_on_reg=False, + **kwargs) -> None: + self.share_conv = share_conv + self.exp_on_reg = exp_on_reg + assert scale_angle == False, 'scale_angle does not support in RotatedRTMDetSepBNHead' + super().__init__( + num_classes, + in_channels, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + pred_kernel_size=pred_kernel_size, + scale_angle=False, + **kwargs) + + def _init_layers(self) -> None: + """Initialize layers of the head.""" + self.cls_convs = nn.ModuleList() + self.reg_convs = nn.ModuleList() + + self.rtm_cls = nn.ModuleList() + self.rtm_reg = nn.ModuleList() + self.rtm_ang = nn.ModuleList() + if self.with_objectness: + self.rtm_obj = nn.ModuleList() + for n in range(len(self.prior_generator.strides)): + cls_convs = nn.ModuleList() + reg_convs = nn.ModuleList() + for i in range(self.stacked_convs): + chn = self.in_channels if i == 0 else self.feat_channels + cls_convs.append( + ConvModule( + chn, + self.feat_channels, + 3, + stride=1, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + reg_convs.append( + ConvModule( + chn, + self.feat_channels, + 3, + stride=1, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + self.cls_convs.append(cls_convs) + self.reg_convs.append(reg_convs) + + self.rtm_cls.append( + nn.Conv2d( + self.feat_channels, + self.num_base_priors * self.cls_out_channels, + self.pred_kernel_size, + padding=self.pred_kernel_size // 2)) + self.rtm_reg.append( + nn.Conv2d( + self.feat_channels, + self.num_base_priors * 4, + self.pred_kernel_size, + padding=self.pred_kernel_size // 2)) + self.rtm_ang.append( + nn.Conv2d( + self.feat_channels, + self.num_base_priors * self.angle_coder.encode_size, + self.pred_kernel_size, + padding=self.pred_kernel_size // 2)) + if self.with_objectness: + self.rtm_obj.append( + nn.Conv2d( + self.feat_channels, + 1, + self.pred_kernel_size, + padding=self.pred_kernel_size // 2)) + + if self.share_conv: + for n in range(len(self.prior_generator.strides)): + for i in range(self.stacked_convs): + self.cls_convs[n][i].conv = self.cls_convs[0][i].conv + self.reg_convs[n][i].conv = self.reg_convs[0][i].conv + + def init_weights(self) -> None: + """Initialize weights of the head.""" + for m in self.modules(): + if isinstance(m, nn.Conv2d): + normal_init(m, mean=0, std=0.01) + if is_norm(m): + constant_init(m, 1) + bias_cls = bias_init_with_prob(0.01) + for rtm_cls, rtm_reg, rtm_ang in zip(self.rtm_cls, self.rtm_reg, self.rtm_ang): + normal_init(rtm_cls, std=0.01, bias=bias_cls) + normal_init(rtm_reg, std=0.01) + normal_init(rtm_ang, std=0.01) + if self.with_objectness: + for rtm_obj in self.rtm_obj: + normal_init(rtm_obj, std=0.01, bias=bias_cls) + + def forward(self, feats: Tuple[Tensor, ...]) -> tuple: + """Forward features from the upstream network. + + Args: + feats (tuple[Tensor]): Features from the upstream network, each is + a 4D-tensor. + + Returns: + tuple: Usually a tuple of classification scores and bbox prediction + + - cls_scores (tuple[Tensor]): Classification scores for all scale + levels, each is a 4D-tensor, the channels number is + num_anchors * num_classes. + - bbox_preds (tuple[Tensor]): Box energies / deltas for all scale + levels, each is a 4D-tensor, the channels number is + num_anchors * 4. + """ + + cls_scores = [] + bbox_preds = [] + angle_preds = [] + for idx, (x, stride) in enumerate( + zip(feats, self.prior_generator.strides)): + cls_feat = x + reg_feat = x + + for cls_layer in self.cls_convs[idx]: + cls_feat = cls_layer(cls_feat) + cls_score = self.rtm_cls[idx](cls_feat) + + for reg_layer in self.reg_convs[idx]: + reg_feat = reg_layer(reg_feat) + + if self.with_objectness: + objectness = self.rtm_obj[idx](reg_feat) + cls_score = inverse_sigmoid( + sigmoid_geometric_mean(cls_score, objectness)) + if self.exp_on_reg: + reg_dist = self.rtm_reg[idx](reg_feat).exp() * stride[0] + else: + reg_dist = self.rtm_reg[idx](reg_feat) * stride[0] + + angle_pred = self.rtm_ang[idx](reg_feat) + + cls_scores.append(cls_score) + bbox_preds.append(reg_dist) + angle_preds.append(angle_pred) + return tuple(cls_scores), tuple(bbox_preds), tuple(angle_preds) + + +if __name__ == '__main__': + points = torch.tensor([[0., 0., 8., 8.], + [8., 0., 8., 8.], + [16., 0., 8., 8.], + [24., 0., 8., 8.]], device='cuda:0') + + distances = torch.tensor( + [[[7.4215, 7.8629, 7.4568, 8.1447, -0.0224], + [7.3209, 7.7807, 7.4076, 8.1743, -0.0194], + [7.2929, 7.7480, 7.3624, 8.1829, -0.0228], + [7.3291, 7.7770, 7.3966, 8.2230, -0.0215]], + [[7.5713, 7.8529, 7.5189, 8.1220, -0.0212], + [7.6215, 7.8978, 7.5875, 8.2110, -0.0304], + [7.6464, 7.8849, 7.4770, 8.0470, -0.0372], + [7.5255, 7.8110, 7.4478, 8.1207, -0.0282]], + [[7.4363, 7.8605, 7.4451, 8.1569, -0.0239], + [7.3455, 7.7839, 7.4027, 8.1784, -0.0184], + [7.3022, 7.7584, 7.3663, 8.1751, -0.0232], + [7.3419, 7.7717, 7.3903, 8.2077, -0.0242]], + [[7.4416, 7.8393, 7.4837, 8.1546, -0.0213], + [7.3217, 7.7790, 7.3956, 8.1805, -0.0154], + [7.3080, 7.7377, 7.3658, 8.1779, -0.0205], + [7.3390, 7.7580, 7.4031, 8.2245, -0.0205]], + [[7.7741, 7.8088, 7.7546, 8.3303, -0.0151], + [7.7897, 7.9653, 7.9556, 8.3727, -0.0266], + [8.2531, 8.2622, 8.4759, 8.1064, -0.0506], + [8.1185, 7.9733, 8.1983, 8.3481, -0.0243]], + [[7.4850, 7.8193, 7.4702, 8.1592, -0.0248], + [7.3636, 7.7511, 7.4078, 8.1570, -0.0204], + [7.3184, 7.7122, 7.3942, 8.1611, -0.0243], + [7.3617, 7.7788, 7.4001, 8.2358, -0.0256]], + [[7.4395, 7.8696, 7.4530, 8.1517, -0.0246], + [7.3550, 7.7790, 7.4124, 8.1524, -0.0221], + [7.3377, 7.7614, 7.3752, 8.1712, -0.0226], + [7.3411, 7.7603, 7.4000, 8.2265, -0.0246]], + [[7.4344, 7.8780, 7.4388, 8.1534, -0.0247], + [7.3309, 7.8050, 7.3925, 8.1647, -0.0202], + [7.2904, 7.7461, 7.3651, 8.1827, -0.0221], + [7.3486, 7.7741, 7.4090, 8.2144, -0.0230]]], device='cuda:0') + + res = distance2obb(points, distances, 'le90').reshape(-1, 5) + + c = DistanceAnglePointCoder(angle_version='le90') + res2 = c.decode(points.repeat(8, 1)[:, :2], distances.reshape(-1, 5)) + + print((res == res2).unique()) diff --git a/mmrotate/models/task_modules/coders/distance_angle_point_coder.py b/mmrotate/models/task_modules/coders/distance_angle_point_coder.py index d456d7202..0b7d3f9ec 100644 --- a/mmrotate/models/task_modules/coders/distance_angle_point_coder.py +++ b/mmrotate/models/task_modules/coders/distance_angle_point_coder.py @@ -95,17 +95,17 @@ def distance2obb(self, distance, max_shape=None, angle_version='oc'): - distance, angle = distance.split([4, 1], dim=1) + distance, angle = distance.split([4, 1], dim=-1) cos_angle, sin_angle = torch.cos(angle), torch.sin(angle) - rot_matrix = torch.cat([cos_angle, -sin_angle, sin_angle, cos_angle], - dim=1).reshape(-1, 2, 2) - wh = distance[:, :2] + distance[:, 2:] - offset_t = (distance[:, 2:] - distance[:, :2]) / 2 - offset_t = offset_t.unsqueeze(2) - offset = torch.bmm(rot_matrix, offset_t).squeeze(2) - ctr = points + offset + rot_matrix = torch.cat([cos_angle, -sin_angle, sin_angle, cos_angle], dim=-1) + rot_matrix = rot_matrix.reshape(*rot_matrix.shape[:-1], 2, 2) + + wh = distance[..., :2] + distance[..., 2:] + offset_t = (distance[..., 2:] - distance[..., :2]) / 2 + offset = torch.matmul(rot_matrix, offset_t[..., None]).squeeze(-1) + ctr = points[..., :2] + offset angle_regular = norm_angle(angle, angle_version) return torch.cat([ctr, wh, angle_regular], dim=-1) From dbc71d49d54bde6363a3980c1ab678ea71d22572 Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Tue, 8 Nov 2022 18:07:30 +0800 Subject: [PATCH 04/52] fix distribution angle coder --- mmrotate/models/task_modules/coders/angle_coder.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mmrotate/models/task_modules/coders/angle_coder.py b/mmrotate/models/task_modules/coders/angle_coder.py index 7941cd423..8db58fa6a 100644 --- a/mmrotate/models/task_modules/coders/angle_coder.py +++ b/mmrotate/models/task_modules/coders/angle_coder.py @@ -172,10 +172,10 @@ def encode(self, angle: Tensor) -> Tensor: return dfl_target.flatten() def decode(self, angle: Tensor, keepdim: bool = False) -> Tensor: - angle = F.softmax(angle.reshape(-1, self.reg_max + 1), dim=-1) - angle = F.linear(angle, self.project.type_as(angle)) + decode_angle = F.softmax(angle.reshape(-1, self.reg_max + 1), dim=-1) + decode_angle = F.linear(decode_angle, self.project.type_as(angle)) if keepdim: - angle = angle.reshape(-1, 1) + decode_angle = decode_angle.reshape(*angle.shape[:-1], 1) else: - angle = angle.reshape(-1) - return self.angle_range * angle / self.reg_max - self.angle_offset + decode_angle = decode_angle.reshape(-1) + return self.angle_range * decode_angle / self.reg_max - self.angle_offset From cacfc88ff316849b4c882b2c2cbd107dc59e5d02 Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Tue, 8 Nov 2022 19:04:43 +0800 Subject: [PATCH 05/52] clean --- .../rotated_rtmdet/_base_/default_runtime.py | 33 +++ configs/rotated_rtmdet/_base_/dota_rr.py | 104 +++++++++ configs/rotated_rtmdet/_base_/schedule_3x.py | 34 +++ .../rotated_rtmdet_l-300e-hrsc.py | 208 ------------------ .../rotated_rtmdet_l-3x-dota.py | 77 +++++++ .../rotated_rtmdet_s-300e-hrsc.py | 70 ------ .../rotated_rtmdet_tiny-300e-hrsc.py | 50 ----- .../rotated_rtmdet_tiny-3x-dota.py | 20 ++ .../models/dense_heads/rotated_rtmdet_head.py | 178 +++++++-------- mmrotate/models/losses/__init__.py | 3 +- mmrotate/models/losses/rd_iou_loss.py | 166 ++++++++++++++ 11 files changed, 527 insertions(+), 416 deletions(-) create mode 100644 configs/rotated_rtmdet/_base_/default_runtime.py create mode 100644 configs/rotated_rtmdet/_base_/dota_rr.py create mode 100644 configs/rotated_rtmdet/_base_/schedule_3x.py delete mode 100644 configs/rotated_rtmdet/rotated_rtmdet_l-300e-hrsc.py create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_l-3x-dota.py delete mode 100644 configs/rotated_rtmdet/rotated_rtmdet_s-300e-hrsc.py delete mode 100644 configs/rotated_rtmdet/rotated_rtmdet_tiny-300e-hrsc.py create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota.py create mode 100644 mmrotate/models/losses/rd_iou_loss.py diff --git a/configs/rotated_rtmdet/_base_/default_runtime.py b/configs/rotated_rtmdet/_base_/default_runtime.py new file mode 100644 index 000000000..724ad4014 --- /dev/null +++ b/configs/rotated_rtmdet/_base_/default_runtime.py @@ -0,0 +1,33 @@ +default_scope = 'mmrotate' + +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', interval=12, max_keep_ckpts=3), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='mmdet.DetVisualizationHook')) + +env_cfg = dict( + cudnn_benchmark=False, + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + dist_cfg=dict(backend='nccl'), +) + +vis_backends = [dict(type='LocalVisBackend')] +visualizer = dict( + type='RotLocalVisualizer', vis_backends=vis_backends, name='visualizer') +log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) + +log_level = 'INFO' +load_from = None +resume = False + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='mmdet.ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49) +] diff --git a/configs/rotated_rtmdet/_base_/dota_rr.py b/configs/rotated_rtmdet/_base_/dota_rr.py new file mode 100644 index 000000000..e1c9bf46b --- /dev/null +++ b/configs/rotated_rtmdet/_base_/dota_rr.py @@ -0,0 +1,104 @@ +# dataset settings +dataset_type = 'DOTADataset' +# data_root = '/home/wangchen/liuyanyi/datasets/dota_mmrotate_ss/' +data_root = '/datasets/dota_mmrotate_ss/' +file_client_args = dict(backend='disk') + +train_pipeline = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True), + dict( + type='mmdet.RandomFlip', + prob=0.75, + direction=['horizontal', 'vertical', 'diagonal']), + dict( + type='RandomRotate', + prob=0.5, + angle_range=180, + rect_obj_labels=[9, 11]), + dict( + type='mmdet.Pad', size=(1024, 1024), + pad_val=dict(img=(114, 114, 114))), + dict(type='mmdet.PackDetInputs') +] +val_pipeline = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True), + # avoid bboxes being resized + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict( + type='mmdet.Pad', size=(1024, 1024), + pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] +test_pipeline = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True), + dict( + type='mmdet.Pad', size=(1024, 1024), + pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] +train_dataloader = dict( + batch_size=8, + num_workers=8, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + batch_sampler=None, + pin_memory=True, + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='trainval/annfiles/', + data_prefix=dict(img_path='trainval/images/'), + img_shape=(1024, 1024), + filter_cfg=dict(filter_empty_gt=True), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='val/annfiles/', + data_prefix=dict(img_path='val/images/'), + img_shape=(1024, 1024), + test_mode=True, + pipeline=val_pipeline)) +# test_dataloader = val_dataloader + +val_evaluator = dict(type='DOTAMetric', metric='mAP') +# test_evaluator = val_evaluator + +# inference on test dataset and format the output results +# for submission. Note: the test set has no annotation. +test_dataloader = dict( + batch_size=8, + num_workers=8, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict(img_path='test/images/'), + img_shape=(1024, 1024), + test_mode=True, + pipeline=test_pipeline)) +test_evaluator = dict( + type='DOTAMetric', + format_only=True, + merge_patches=True, + outfile_prefix='./work_dirs/rtm_iou_test_coco/Task1') diff --git a/configs/rotated_rtmdet/_base_/schedule_3x.py b/configs/rotated_rtmdet/_base_/schedule_3x.py new file mode 100644 index 000000000..30f850b3d --- /dev/null +++ b/configs/rotated_rtmdet/_base_/schedule_3x.py @@ -0,0 +1,34 @@ +max_epochs = 3 * 12 +base_lr = 0.004 / 16 +interval = 12 + +train_cfg = dict( + type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=interval) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_l-300e-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_l-300e-hrsc.py deleted file mode 100644 index e43377a27..000000000 --- a/configs/rotated_rtmdet/rotated_rtmdet_l-300e-hrsc.py +++ /dev/null @@ -1,208 +0,0 @@ -_base_ = [ - '../_base_/default_runtime.py', '../_base_/schedules/schedule_1x.py', - '../_base_/datasets/hrsc.py' -] -model = dict( - type='mmdet.RTMDet', - data_preprocessor=dict( - type='mmdet.DetDataPreprocessor', - mean=[103.53, 116.28, 123.675], - std=[57.375, 57.12, 58.395], - bgr_to_rgb=False, - boxtype2tensor=False, - batch_augments=None), - backbone=dict( - type='mmdet.CSPNeXt', - arch='P5', - expand_ratio=0.5, - deepen_factor=1, - widen_factor=1, - channel_attention=True, - norm_cfg=dict(type='SyncBN'), - act_cfg=dict(type='SiLU')), - neck=dict( - type='mmdet.CSPNeXtPAFPN', - in_channels=[256, 512, 1024], - out_channels=256, - num_csp_blocks=3, - expand_ratio=0.5, - norm_cfg=dict(type='SyncBN'), - act_cfg=dict(type='SiLU')), - bbox_head=dict( - type='RotatedRTMDetSepBNHead', - # type='RTMDetSepBNHead', - num_classes=1, - in_channels=256, - stacked_convs=2, - feat_channels=256, - anchor_generator=dict( - type='mmdet.MlvlPointGenerator', offset=0, strides=[8, 16, 32]), - bbox_coder=dict(type='DistanceAnglePointCoder', angle_version='le90'), - loss_cls=dict( - type='mmdet.QualityFocalLoss', - use_sigmoid=True, - beta=2.0, - loss_weight=1.0), - loss_bbox=dict(type='RotatedIoULoss', mode='linear', loss_weight=2.0), - with_objectness=False, - exp_on_reg=True, - share_conv=True, - pred_kernel_size=1, - use_hbbox_loss=False, - scale_angle=False, - # angle_coder=dict( - # _scope_='mmrotate', - # type='CSLCoder', - # angle_version='le90', - # omega=4, - # window='gaussian', - # radius=3), - # loss_angle=dict( - # _scope_='mmrotate', - # type='SmoothFocalLoss', - # gamma=2.0, - # alpha=0.25, - # loss_weight=0.8), - # loss_bbox=dict(type='mmdet.GIoULoss', loss_weight=2.0), - norm_cfg=dict(type='SyncBN'), - act_cfg=dict(type='SiLU')), - train_cfg=dict( - assigner=dict( - type='mmdet.DynamicSoftLabelAssigner', - iou_calculator=dict(type='RBboxOverlaps2D'), - topk=13), - allowed_border=-1, - pos_weight=-1, - debug=False), - test_cfg=dict( - nms_pre=2000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms_rotated', iou_threshold=0.1), - max_per_img=2000), -) - -train_pipeline = [ - dict( - type='mmdet.LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), - dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), - dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), - dict(type='mmdet.CachedMosaic', img_scale=(800, 800), pad_val=114.0), - dict( - type='mmdet.RandomResize', - resize_type='mmdet.Resize', - scale=(1600, 1600), - ratio_range=(0.1, 2.0), - keep_ratio=True), - dict(type='mmdet.RandomCrop', crop_size=(800, 800)), - dict(type='mmdet.YOLOXHSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), - dict( - type='mmdet.CachedMixUp', - img_scale=(800, 800), - ratio_range=(1.0, 1.0), - max_cached_images=20, - pad_val=(114, 114, 114)), - dict(type='mmdet.PackDetInputs') -] - -train_pipeline_stage2 = [ - dict( - type='mmdet.LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), - dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), - dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), - dict( - type='mmdet.RandomResize', - resize_type='mmdet.Resize', - scale=(1600, 1600), - ratio_range=(0.1, 2.0), - keep_ratio=True), - dict(type='mmdet.RandomCrop', crop_size=(800, 800)), - dict(type='mmdet.YOLOXHSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), - dict(type='mmdet.PackDetInputs') -] - -test_pipeline = [ - dict( - type='mmdet.LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), - dict(type='mmdet.Resize', scale=(800, 800), keep_ratio=True), - dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), - dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), - dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), - dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor')) -] - -train_dataloader = dict( - batch_size=8, - num_workers=8, - batch_sampler=None, - pin_memory=True, - dataset=dict(pipeline=train_pipeline)) -val_dataloader = dict( - batch_size=1, num_workers=1, dataset=dict(pipeline=test_pipeline)) -test_dataloader = val_dataloader - -max_epochs = 300 -stage2_num_epochs = 20 -base_lr = 0.004 / 4 -interval = 20 - -train_cfg = dict( - max_epochs=max_epochs, - val_interval=interval, - dynamic_intervals=[(max_epochs - stage2_num_epochs, 10)]) - -# optimizer -optim_wrapper = dict( - _delete_=True, - type='OptimWrapper', - optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), - paramwise_cfg=dict( - norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) - -# learning rate -param_scheduler = [ - dict( - type='LinearLR', - start_factor=1.0e-5, - by_epoch=False, - begin=0, - end=1000), - dict( - # use cosine lr from 150 to 300 epoch - type='CosineAnnealingLR', - eta_min=base_lr * 0.05, - begin=max_epochs // 2, - end=max_epochs, - T_max=max_epochs // 2, - by_epoch=True, - convert_to_iter_based=True), -] - -# hooks -default_hooks = dict( - checkpoint=dict( - interval=interval, - max_keep_ckpts=3 # only keep latest 3 checkpoints - )) -custom_hooks = [ - dict( - type='EMAHook', - ema_type='mmdet.ExpMomentumEMA', - momentum=0.0002, - update_buffers=True, - priority=49), - dict( - type='mmdet.PipelineSwitchHook', - switch_epoch=max_epochs - stage2_num_epochs, - switch_pipeline=train_pipeline_stage2) -] diff --git a/configs/rotated_rtmdet/rotated_rtmdet_l-3x-dota.py b/configs/rotated_rtmdet/rotated_rtmdet_l-3x-dota.py new file mode 100644 index 000000000..513494c00 --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_l-3x-dota.py @@ -0,0 +1,77 @@ +_base_ = [ + './_base_/default_runtime.py', './_base_/schedule_3x.py', + './_base_/dota_rr.py' +] +angle_version = 'le90' +model = dict( + type='mmdet.RTMDet', + data_preprocessor=dict( + type='mmdet.DetDataPreprocessor', + mean=[103.53, 116.28, 123.675], + std=[57.375, 57.12, 58.395], + bgr_to_rgb=False, + boxtype2tensor=False, + batch_augments=None), + backbone=dict( + type='mmdet.CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=1, + widen_factor=1, + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU')), + neck=dict( + type='mmdet.CSPNeXtPAFPN', + in_channels=[256, 512, 1024], + out_channels=256, + num_csp_blocks=3, + expand_ratio=0.5, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU')), + bbox_head=dict( + type='RotatedRTMDetSepBNHead', + # type='RTMDetSepBNHead', + num_classes=15, + in_channels=256, + stacked_convs=2, + feat_channels=256, + angle_version=angle_version, + anchor_generator=dict( + type='mmdet.MlvlPointGenerator', offset=0, strides=[8, 16, 32]), + bbox_coder=dict( + type='DistanceAnglePointCoder', angle_version=angle_version), + loss_cls=dict( + type='mmdet.QualityFocalLoss', + use_sigmoid=True, + beta=2.0, + loss_weight=1.0), + loss_bbox=dict(type='RotatedIoULoss', mode='linear', loss_weight=2.0), + with_objectness=False, + exp_on_reg=True, + share_conv=True, + pred_kernel_size=1, + use_hbbox_loss=False, + scale_angle=False, + # angle_coder=dict( + # type='DistributionAngleCoder', + # angle_version='le90'), + loss_angle=None, + # loss_bbox=dict(type='mmdet.GIoULoss', loss_weight=2.0), + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU')), + train_cfg=dict( + assigner=dict( + type='mmdet.DynamicSoftLabelAssigner', + iou_calculator=dict(type='RBboxOverlaps2D'), + topk=13), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=2000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms_rotated', iou_threshold=0.1), + max_per_img=2000), +) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_s-300e-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_s-300e-hrsc.py deleted file mode 100644 index a3b52616d..000000000 --- a/configs/rotated_rtmdet/rotated_rtmdet_s-300e-hrsc.py +++ /dev/null @@ -1,70 +0,0 @@ -_base_ = './rotated_rtmdet_l-300e-hrsc.py' -checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' # noqa -model = dict( - backbone=dict( - deepen_factor=0.33, - widen_factor=0.5, - init_cfg=dict( - type='Pretrained', prefix='backbone.', checkpoint=checkpoint)), - neck=dict(in_channels=[128, 256, 512], out_channels=128, num_csp_blocks=1), - bbox_head=dict(in_channels=128, feat_channels=128, exp_on_reg=False)) - -train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), - dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), - dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), - dict(type='mmdet.CachedMosaic', img_scale=(800, 800), pad_val=114.0), - dict( - type='RandomResize', - resize_type='mmdet.Resize', - scale=(1600, 1600), - ratio_range=(0.5, 2.0), - keep_ratio=True), - dict(type='mmdet.RandomCrop', crop_size=(800, 800)), - dict(type='mmdet.YOLOXHSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), - dict( - type='mmdet.CachedMixUp', - img_scale=(800, 800), - ratio_range=(1.0, 1.0), - max_cached_images=20, - pad_val=(114, 114, 114)), - dict(type='mmdet.PackDetInputs') -] - -train_pipeline_stage2 = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), - dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), - dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), - dict( - type='RandomResize', - resize_type='mmdet.Resize', - scale=(1600, 1600), - ratio_range=(0.5, 2.0), - keep_ratio=True), - dict(type='mmdet.RandomCrop', crop_size=(800, 800)), - dict(type='mmdet.YOLOXHSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), - dict(type='mmdet.PackDetInputs') -] - -train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) - -custom_hooks = [ - dict( - type='EMAHook', - ema_type='mmdet.ExpMomentumEMA', - momentum=0.0002, - update_buffers=True, - priority=49), - dict( - type='mmdet.PipelineSwitchHook', - switch_epoch=280, - switch_pipeline=train_pipeline_stage2) -] \ No newline at end of file diff --git a/configs/rotated_rtmdet/rotated_rtmdet_tiny-300e-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_tiny-300e-hrsc.py deleted file mode 100644 index 1df2d99b3..000000000 --- a/configs/rotated_rtmdet/rotated_rtmdet_tiny-300e-hrsc.py +++ /dev/null @@ -1,50 +0,0 @@ -_base_ = './rotated_rtmdet_s-300e-hrsc.py' - -cocop = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth' # noqa -checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa - -model = dict( - init_cfg=dict(type='Pretrained', checkpoint=cocop), - backbone=dict( - deepen_factor=0.167, - widen_factor=0.375, - # init_cfg=dict( - # type='Pretrained', prefix='backbone.', checkpoint=checkpoint) - ), - neck=dict(in_channels=[96, 192, 384], out_channels=96, num_csp_blocks=1), - bbox_head=dict(in_channels=96, feat_channels=96, exp_on_reg=False)) - -train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), - dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), - dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), - dict( - type='mmdet.CachedMosaic', - img_scale=(800, 800), - pad_val=114.0, - max_cached_images=20, - random_pop=False), - dict( - type='RandomResize', - resize_type='mmdet.Resize', - scale=(1600, 1600), - ratio_range=(0.5, 2.0), - keep_ratio=True), - dict(type='mmdet.RandomCrop', crop_size=(800, 800)), - dict(type='mmdet.YOLOXHSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), - dict( - type='mmdet.CachedMixUp', - img_scale=(800, 800), - ratio_range=(1.0, 1.0), - max_cached_images=10, - random_pop=False, - pad_val=(114, 114, 114), - prob=0.5), - dict(type='mmdet.PackDetInputs') -] - -train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota.py b/configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota.py new file mode 100644 index 000000000..77c81f587 --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota.py @@ -0,0 +1,20 @@ +_base_ = './rotated_rtmdet_l-3x-dota.py' + +coco_ckpt = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth' # noqa +# checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa + +model = dict( + init_cfg=dict(type='Pretrained', checkpoint=coco_ckpt), + backbone=dict( + deepen_factor=0.167, + widen_factor=0.375, + # init_cfg=dict( + # type='Pretrained', prefix='backbone.', checkpoint=checkpoint) + ), + neck=dict(in_channels=[96, 192, 384], out_channels=96, num_csp_blocks=1), + bbox_head=dict( + in_channels=96, + feat_channels=96, + exp_on_reg=False, + loss_bbox=dict(type='RotatedIoULoss', mode='linear', loss_weight=2.0), + )) diff --git a/mmrotate/models/dense_heads/rotated_rtmdet_head.py b/mmrotate/models/dense_heads/rotated_rtmdet_head.py index e0f1fbdde..b7b7c1cf9 100644 --- a/mmrotate/models/dense_heads/rotated_rtmdet_head.py +++ b/mmrotate/models/dense_heads/rotated_rtmdet_head.py @@ -1,33 +1,36 @@ +# Copyright (c) OpenMMLab. All rights reserved. import copy -from typing import Tuple, List, Optional +from typing import List, Optional, Tuple import torch -from mmcv.cnn import Scale, ConvModule, is_norm +from mmcv.cnn import ConvModule, Scale, is_norm from mmdet.models import inverse_sigmoid from mmdet.models.dense_heads import RTMDetHead from mmdet.models.task_modules import anchor_inside_flags -from mmdet.models.utils import sigmoid_geometric_mean, multi_apply, unmap, select_single_mlvl, filter_scores_and_topk -from mmdet.structures.bbox import distance2bbox, get_box_tensor, cat_boxes, bbox_cxcywh_to_xyxy -from mmdet.utils import ConfigType, OptConfigType, InstanceList, OptInstanceList, reduce_mean +from mmdet.models.utils import (filter_scores_and_topk, multi_apply, + select_single_mlvl, sigmoid_geometric_mean, + unmap) +from mmdet.structures.bbox import bbox_cxcywh_to_xyxy, cat_boxes, distance2bbox +from mmdet.utils import (ConfigType, InstanceList, OptConfigType, + OptInstanceList, reduce_mean) from mmengine import ConfigDict -from mmengine.model import normal_init, constant_init, bias_init_with_prob +from mmengine.model import bias_init_with_prob, constant_init, normal_init from mmengine.structures import InstanceData -from mmrotate.registry import MODELS, TASK_UTILS +from torch import Tensor, nn from mmrotate.models.task_modules.coders import DistanceAnglePointCoder -from mmrotate.structures import norm_angle, RotatedBoxes -from torch import nn, Tensor +from mmrotate.registry import MODELS, TASK_UTILS +from mmrotate.structures import RotatedBoxes, norm_angle # TODO move to mmrotate.structures.transform and update Coder -def distance2obb(points, - distance, - angle_version='oc'): +def distance2obb(points, distance, angle_version='oc'): distance, angle = distance.split([4, 1], dim=-1) cos_angle, sin_angle = torch.cos(angle), torch.sin(angle) - rot_matrix = torch.cat([cos_angle, -sin_angle, sin_angle, cos_angle], dim=-1) + rot_matrix = torch.cat([cos_angle, -sin_angle, sin_angle, cos_angle], + dim=-1) rot_matrix = rot_matrix.reshape(*rot_matrix.shape[:-1], 2, 2) wh = distance[..., :2] + distance[..., 2:] @@ -57,11 +60,13 @@ class RotatedRTMDetHead(RTMDetHead): def __init__(self, num_classes: int, in_channels: int, + angle_version: str = 'le90', use_hbbox_loss: bool = False, scale_angle: bool = True, angle_coder: ConfigType = dict(type='PseudoAngleCoder'), loss_angle: OptConfigType = None, **kwargs) -> None: + self.angle_version = angle_version self.use_hbbox_loss = use_hbbox_loss self.is_scale_angle = scale_angle self.angle_coder = TASK_UTILS.build(angle_coder) @@ -147,10 +152,10 @@ def forward(self, feats: Tuple[Tensor, ...]) -> tuple: angle_preds.append(angle_pred) return tuple(cls_scores), tuple(bbox_preds), tuple(angle_preds) - def loss_by_feat_single(self, cls_score: Tensor, bbox_pred: Tensor, angle_pred: Tensor, - labels: Tensor, label_weights: Tensor, - bbox_targets: Tensor, assign_metrics: Tensor, - stride: List[int]): + def loss_by_feat_single(self, cls_score: Tensor, bbox_pred: Tensor, + angle_pred: Tensor, labels: Tensor, + label_weights: Tensor, bbox_targets: Tensor, + assign_metrics: Tensor, stride: List[int]): """Compute loss of a single scale level. Args: @@ -201,14 +206,16 @@ def loss_by_feat_single(self, cls_score: Tensor, bbox_pred: Tensor, angle_pred: pos_decode_bbox_pred = pos_bbox_pred pos_decode_bbox_targets = pos_bbox_targets if self.use_hbbox_loss: - pos_decode_bbox_targets = bbox_cxcywh_to_xyxy(pos_bbox_targets[:, :4]) + pos_decode_bbox_targets = bbox_cxcywh_to_xyxy( + pos_bbox_targets[:, :4]) # regression loss pos_bbox_weight = assign_metrics[pos_inds] loss_angle = angle_pred.sum() * 0 if self.loss_angle is not None: - angle_pred = angle_pred.reshape(-1, self.angle_coder.encode_size) + angle_pred = angle_pred.reshape(-1, + self.angle_coder.encode_size) pos_angle_pred = angle_pred[pos_inds] pos_angle_target = pos_bbox_targets[:, 4:5] pos_angle_target = self.angle_coder.encode(pos_angle_target) @@ -229,7 +236,8 @@ def loss_by_feat_single(self, cls_score: Tensor, bbox_pred: Tensor, angle_pred: pos_bbox_weight = bbox_targets.new_tensor(0.) loss_angle = angle_pred.sum() * 0 - return loss_cls, loss_bbox, loss_angle, assign_metrics.sum(), pos_bbox_weight.sum(), pos_bbox_weight.sum() + return loss_cls, loss_bbox, loss_angle, assign_metrics.sum( + ), pos_bbox_weight.sum(), pos_bbox_weight.sum() def loss_by_feat(self, cls_scores: List[Tensor], @@ -275,10 +283,12 @@ def loss_by_feat(self, decoded_bboxes = [] decoded_hbboxes = [] angle_preds_list = [] - for anchor, bbox_pred, angle_pred in zip(anchor_list[0], bbox_preds, angle_preds): + for anchor, bbox_pred, angle_pred in zip(anchor_list[0], bbox_preds, + angle_preds): anchor = anchor.reshape(-1, 4) bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, 4) - angle_pred = angle_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, self.angle_coder.encode_size) + angle_pred = angle_pred.permute(0, 2, 3, 1).reshape( + num_imgs, -1, self.angle_coder.encode_size) if self.use_hbbox_loss: hbbox_pred = distance2bbox(anchor, bbox_pred) @@ -287,15 +297,8 @@ def loss_by_feat(self, decoded_angle = self.angle_coder.decode(angle_pred, keepdim=True) bbox_pred = torch.cat([bbox_pred, decoded_angle], dim=-1) - # res = distance2obb(anchor, bbox_pred, 'le90').reshape(-1, 5) - # - # c = DistanceAnglePointCoder(angle_version='le90') - # res2 = c.decode(anchor.repeat(8, 1)[:, :2], bbox_pred.reshape(-1, 5)) - # - # print((res == res2).unique()) - - # TODO add arg angle_version - bbox_pred = distance2obb(anchor, bbox_pred, angle_version='le90') + bbox_pred = distance2obb( + anchor, bbox_pred, angle_version=self.angle_version) decoded_bboxes.append(bbox_pred) angle_preds_list.append(angle_pred) @@ -316,17 +319,12 @@ def loss_by_feat(self, if self.use_hbbox_loss: decoded_bboxes = decoded_hbboxes - losses_cls, losses_bbox, losses_angle, \ - cls_avg_factors, bbox_avg_factors, angle_avg_factors = multi_apply( - self.loss_by_feat_single, - cls_scores, - decoded_bboxes, - angle_preds_list, - labels_list, - label_weights_list, - bbox_targets_list, - assign_metrics_list, - self.prior_generator.strides) + (losses_cls, losses_bbox, losses_angle, cls_avg_factors, + bbox_avg_factors, angle_avg_factors) = multi_apply( + self.loss_by_feat_single, cls_scores, decoded_bboxes, + angle_preds_list, labels_list, label_weights_list, + bbox_targets_list, assign_metrics_list, + self.prior_generator.strides) cls_avg_factor = reduce_mean(sum(cls_avg_factors)).clamp_(min=1).item() losses_cls = list(map(lambda x: x / cls_avg_factor, losses_cls)) @@ -337,8 +335,12 @@ def loss_by_feat(self, if self.loss_angle is not None: angle_avg_factors = reduce_mean( sum(angle_avg_factors)).clamp_(min=1).item() - losses_angle = list(map(lambda x: x / angle_avg_factors, losses_angle)) - return dict(loss_cls=losses_cls, loss_bbox=losses_bbox, loss_angle=losses_angle) + losses_angle = list( + map(lambda x: x / angle_avg_factors, losses_angle)) + return dict( + loss_cls=losses_cls, + loss_bbox=losses_bbox, + loss_angle=losses_angle) else: return dict(loss_cls=losses_cls, loss_bbox=losses_bbox) @@ -390,7 +392,7 @@ def _get_targets_single(self, img_meta['img_shape'][:2], self.train_cfg['allowed_border']) if not inside_flags.any(): - return (None,) * 7 + return (None, ) * 7 # assign gt and sample anchors anchors = flat_anchors[inside_flags, :] @@ -407,7 +409,7 @@ def _get_targets_single(self, num_valid_anchors = anchors.shape[0] bbox_targets = anchors.new_zeros((*anchors.size()[:-1], 5)) - labels = anchors.new_full((num_valid_anchors,), + labels = anchors.new_full((num_valid_anchors, ), self.num_classes, dtype=torch.long) label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) @@ -420,7 +422,8 @@ def _get_targets_single(self, # point-based pos_bbox_targets = sampling_result.pos_gt_bboxes # TODO add arg angle_version - pos_bbox_targets = pos_bbox_targets.regularize_boxes('le90') + pos_bbox_targets = pos_bbox_targets.regularize_boxes( + self.angle_version) bbox_targets[pos_inds, :] = pos_bbox_targets labels[pos_inds] = sampling_result.pos_gt_labels @@ -716,7 +719,8 @@ def __init__(self, **kwargs) -> None: self.share_conv = share_conv self.exp_on_reg = exp_on_reg - assert scale_angle == False, 'scale_angle does not support in RotatedRTMDetSepBNHead' + assert scale_angle is False, \ + 'scale_angle does not support in RotatedRTMDetSepBNHead' super().__init__( num_classes, in_channels, @@ -804,7 +808,8 @@ def init_weights(self) -> None: if is_norm(m): constant_init(m, 1) bias_cls = bias_init_with_prob(0.01) - for rtm_cls, rtm_reg, rtm_ang in zip(self.rtm_cls, self.rtm_reg, self.rtm_ang): + for rtm_cls, rtm_reg, rtm_ang in zip(self.rtm_cls, self.rtm_reg, + self.rtm_ang): normal_init(rtm_cls, std=0.01, bias=bias_cls) normal_init(rtm_reg, std=0.01) normal_init(rtm_ang, std=0.01) @@ -863,44 +868,43 @@ def forward(self, feats: Tuple[Tensor, ...]) -> tuple: if __name__ == '__main__': - points = torch.tensor([[0., 0., 8., 8.], - [8., 0., 8., 8.], - [16., 0., 8., 8.], - [24., 0., 8., 8.]], device='cuda:0') - - distances = torch.tensor( - [[[7.4215, 7.8629, 7.4568, 8.1447, -0.0224], - [7.3209, 7.7807, 7.4076, 8.1743, -0.0194], - [7.2929, 7.7480, 7.3624, 8.1829, -0.0228], - [7.3291, 7.7770, 7.3966, 8.2230, -0.0215]], - [[7.5713, 7.8529, 7.5189, 8.1220, -0.0212], - [7.6215, 7.8978, 7.5875, 8.2110, -0.0304], - [7.6464, 7.8849, 7.4770, 8.0470, -0.0372], - [7.5255, 7.8110, 7.4478, 8.1207, -0.0282]], - [[7.4363, 7.8605, 7.4451, 8.1569, -0.0239], - [7.3455, 7.7839, 7.4027, 8.1784, -0.0184], - [7.3022, 7.7584, 7.3663, 8.1751, -0.0232], - [7.3419, 7.7717, 7.3903, 8.2077, -0.0242]], - [[7.4416, 7.8393, 7.4837, 8.1546, -0.0213], - [7.3217, 7.7790, 7.3956, 8.1805, -0.0154], - [7.3080, 7.7377, 7.3658, 8.1779, -0.0205], - [7.3390, 7.7580, 7.4031, 8.2245, -0.0205]], - [[7.7741, 7.8088, 7.7546, 8.3303, -0.0151], - [7.7897, 7.9653, 7.9556, 8.3727, -0.0266], - [8.2531, 8.2622, 8.4759, 8.1064, -0.0506], - [8.1185, 7.9733, 8.1983, 8.3481, -0.0243]], - [[7.4850, 7.8193, 7.4702, 8.1592, -0.0248], - [7.3636, 7.7511, 7.4078, 8.1570, -0.0204], - [7.3184, 7.7122, 7.3942, 8.1611, -0.0243], - [7.3617, 7.7788, 7.4001, 8.2358, -0.0256]], - [[7.4395, 7.8696, 7.4530, 8.1517, -0.0246], - [7.3550, 7.7790, 7.4124, 8.1524, -0.0221], - [7.3377, 7.7614, 7.3752, 8.1712, -0.0226], - [7.3411, 7.7603, 7.4000, 8.2265, -0.0246]], - [[7.4344, 7.8780, 7.4388, 8.1534, -0.0247], - [7.3309, 7.8050, 7.3925, 8.1647, -0.0202], - [7.2904, 7.7461, 7.3651, 8.1827, -0.0221], - [7.3486, 7.7741, 7.4090, 8.2144, -0.0230]]], device='cuda:0') + points = torch.tensor([[0., 0., 8., 8.], [8., 0., 8., 8.], + [16., 0., 8., 8.], [24., 0., 8., 8.]], + device='cuda:0') + + distances = torch.tensor([[[7.4215, 7.8629, 7.4568, 8.1447, -0.0224], + [7.3209, 7.7807, 7.4076, 8.1743, -0.0194], + [7.2929, 7.7480, 7.3624, 8.1829, -0.0228], + [7.3291, 7.7770, 7.3966, 8.2230, -0.0215]], + [[7.5713, 7.8529, 7.5189, 8.1220, -0.0212], + [7.6215, 7.8978, 7.5875, 8.2110, -0.0304], + [7.6464, 7.8849, 7.4770, 8.0470, -0.0372], + [7.5255, 7.8110, 7.4478, 8.1207, -0.0282]], + [[7.4363, 7.8605, 7.4451, 8.1569, -0.0239], + [7.3455, 7.7839, 7.4027, 8.1784, -0.0184], + [7.3022, 7.7584, 7.3663, 8.1751, -0.0232], + [7.3419, 7.7717, 7.3903, 8.2077, -0.0242]], + [[7.4416, 7.8393, 7.4837, 8.1546, -0.0213], + [7.3217, 7.7790, 7.3956, 8.1805, -0.0154], + [7.3080, 7.7377, 7.3658, 8.1779, -0.0205], + [7.3390, 7.7580, 7.4031, 8.2245, -0.0205]], + [[7.7741, 7.8088, 7.7546, 8.3303, -0.0151], + [7.7897, 7.9653, 7.9556, 8.3727, -0.0266], + [8.2531, 8.2622, 8.4759, 8.1064, -0.0506], + [8.1185, 7.9733, 8.1983, 8.3481, -0.0243]], + [[7.4850, 7.8193, 7.4702, 8.1592, -0.0248], + [7.3636, 7.7511, 7.4078, 8.1570, -0.0204], + [7.3184, 7.7122, 7.3942, 8.1611, -0.0243], + [7.3617, 7.7788, 7.4001, 8.2358, -0.0256]], + [[7.4395, 7.8696, 7.4530, 8.1517, -0.0246], + [7.3550, 7.7790, 7.4124, 8.1524, -0.0221], + [7.3377, 7.7614, 7.3752, 8.1712, -0.0226], + [7.3411, 7.7603, 7.4000, 8.2265, -0.0246]], + [[7.4344, 7.8780, 7.4388, 8.1534, -0.0247], + [7.3309, 7.8050, 7.3925, 8.1647, -0.0202], + [7.2904, 7.7461, 7.3651, 8.1827, -0.0221], + [7.3486, 7.7741, 7.4090, 8.2144, -0.0230]]], + device='cuda:0') res = distance2obb(points, distances, 'le90').reshape(-1, 5) diff --git a/mmrotate/models/losses/__init__.py b/mmrotate/models/losses/__init__.py index 216543420..644a4db37 100644 --- a/mmrotate/models/losses/__init__.py +++ b/mmrotate/models/losses/__init__.py @@ -3,11 +3,12 @@ from .gaussian_dist_loss import GDLoss from .gaussian_dist_loss_v1 import GDLoss_v1 from .kf_iou_loss import KFLoss +from .rd_iou_loss import RDIoULoss from .rotated_iou_loss import RotatedIoULoss from .smooth_focal_loss import SmoothFocalLoss from .spatial_border_loss import SpatialBorderLoss __all__ = [ 'GDLoss', 'GDLoss_v1', 'KFLoss', 'ConvexGIoULoss', 'BCConvexGIoULoss', - 'SmoothFocalLoss', 'RotatedIoULoss', 'SpatialBorderLoss' + 'SmoothFocalLoss', 'RotatedIoULoss', 'SpatialBorderLoss', 'RDIoULoss' ] diff --git a/mmrotate/models/losses/rd_iou_loss.py b/mmrotate/models/losses/rd_iou_loss.py new file mode 100644 index 000000000..6d971aa00 --- /dev/null +++ b/mmrotate/models/losses/rd_iou_loss.py @@ -0,0 +1,166 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import numpy as np +import torch +import torch.nn as nn +from mmdet.models.losses.utils import weighted_loss + +from mmrotate.registry import MODELS +from mmrotate.structures import norm_angle + + +@weighted_loss +def rd_iou_loss(pred, target, linear=False, mode='log', eps=1e-6): + """Rotated IoU loss. + + Computing the IoU loss between a set of predicted rbboxes and target + rbboxes. + The loss is calculated as negative log of IoU. + + Args: + pred (torch.Tensor): Predicted bboxes of format (x, y, h, w, angle), + shape (n, 5). + target (torch.Tensor): Corresponding gt bboxes, shape (n, 5). + linear (bool, optional): If True, use linear scale of loss instead of + log scale. Default: False. + mode (str): Loss scaling mode, including "linear", "square", and "log". + Default: 'log' + eps (float): Eps to avoid log(0). + Return: + torch.Tensor: Loss tensor. + """ + assert mode in ['linear', 'square', 'log'] + if linear: + mode = 'linear' + warnings.warn( + 'DeprecationWarning: Setting "linear=True" in ' + 'poly_iou_loss is deprecated, please use "mode=`linear`" ' + 'instead.') + + pred_x, pred_y, pred_w, pred_h, pred_t = pred.split([1, 1, 1, 1, 1], + dim=-1) + target_x, target_y, target_w, target_h, target_t = target.split( + [1, 1, 1, 1, 1], dim=-1) + + target_z = torch.zeros_like(target_t) + target_l = torch.ones_like(target_t) * 0.5 * np.pi + + pred_z = torch.ones_like(pred_t) * norm_angle(pred_t - target_t, 'le90') + pred_l = torch.ones_like(pred_t) * 0.5 * np.pi + + area_pred = pred_w * pred_h * pred_l + area_target = target_w * target_h * target_l + + union = ( + f(pred_x, target_x, pred_w, target_w) * + f(pred_y, target_y, pred_h, target_h) * + f(pred_z, target_z, pred_l, target_l)) + + ious = union / (area_pred + area_target - union) + + enclose_area = ( + f2(pred_x, target_x, pred_w, target_w) * + f2(pred_y, target_y, pred_h, target_h) * + f2(pred_z, target_z, pred_l, target_l)) + + gious = ious - (enclose_area - union) / enclose_area + + # ious = ious.squeeze(0).clamp(min=eps) + + loss = 1 - gious.squeeze(-1) + + return loss + + +def f(x1, x2, w1, w2): + ff = torch.min(x1 + 0.5 * w1, x2 + 0.5 * w2) - torch.max( + x1 - 0.5 * w1, x2 - 0.5 * w2) + return ff.clamp(min=0) + + +def f2(x1, x2, w1, w2): + ff = torch.max(x1 + 0.5 * w1, x2 + 0.5 * w2) - torch.min( + x1 - 0.5 * w1, x2 - 0.5 * w2) + return ff.clamp(min=0) + + +@MODELS.register_module() +class RDIoULoss(nn.Module): + """RotatedIoULoss. + + Computing the IoU loss between a set of predicted rbboxes and + target rbboxes. + Args: + linear (bool): If True, use linear scale of loss else determined + by mode. Default: False. + eps (float): Eps to avoid log(0). + reduction (str): Options are "none", "mean" and "sum". + loss_weight (float): Weight of loss. + mode (str): Loss scaling mode, including "linear", "square", and "log". + Default: 'log' + """ + + def __init__(self, + linear=False, + eps=1e-6, + reduction='mean', + loss_weight=1.0, + mode='log'): + super(RDIoULoss, self).__init__() + assert mode in ['linear', 'square', 'log'] + if linear: + mode = 'linear' + warnings.warn('DeprecationWarning: Setting "linear=True" in ' + 'IOULoss is deprecated, please use "mode=`linear`" ' + 'instead.') + self.mode = mode + self.linear = linear + self.eps = eps + self.reduction = reduction + self.loss_weight = loss_weight + + def forward(self, + pred, + target, + weight=None, + avg_factor=None, + reduction_override=None, + **kwargs): + """Forward function. + + Args: + pred (torch.Tensor): The prediction. + target (torch.Tensor): The learning target of the prediction. + weight (torch.Tensor, optional): The weight of loss for each + prediction. Defaults to None. + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + reduction_override (str, optional): The reduction method used to + override the original reduction method of the loss. + Defaults to None. Options are "none", "mean" and "sum". + """ + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + if (weight is not None) and (not torch.any(weight > 0)) and ( + reduction != 'none'): + if pred.dim() == weight.dim() + 1: + weight = weight.unsqueeze(1) + return (pred * weight).sum() # 0 + if weight is not None and weight.dim() > 1: + # TODO: remove this in the future + # reduce the weight of shape (n, 5) to (n,) to match the + # iou_loss of shape (n,) + assert weight.shape == pred.shape + weight = weight.mean(-1) + loss = self.loss_weight * rd_iou_loss( + pred, + target, + weight, + mode=self.mode, + eps=self.eps, + reduction=reduction, + avg_factor=avg_factor, + **kwargs) + return loss From 51026c6d847ea6ae0e18ee345983f3c3e975cbd2 Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Tue, 8 Nov 2022 19:16:08 +0800 Subject: [PATCH 06/52] clean --- mmrotate/models/dense_heads/__init__.py | 2 +- mmrotate/models/task_modules/coders/angle_coder.py | 4 +++- .../models/task_modules/coders/distance_angle_point_coder.py | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/mmrotate/models/dense_heads/__init__.py b/mmrotate/models/dense_heads/__init__.py index 311aa2c83..4fcf22a32 100644 --- a/mmrotate/models/dense_heads/__init__.py +++ b/mmrotate/models/dense_heads/__init__.py @@ -8,9 +8,9 @@ from .rotated_fcos_head import RotatedFCOSHead from .rotated_reppoints_head import RotatedRepPointsHead from .rotated_retina_head import RotatedRetinaHead +from .rotated_rtmdet_head import RotatedRTMDetHead, RotatedRTMDetSepBNHead from .s2a_head import S2AHead, S2ARefineHead from .sam_reppoints_head import SAMRepPointsHead -from .rotated_rtmdet_head import RotatedRTMDetHead, RotatedRTMDetSepBNHead __all__ = [ 'RotatedRetinaHead', 'OrientedRPNHead', 'RotatedRepPointsHead', diff --git a/mmrotate/models/task_modules/coders/angle_coder.py b/mmrotate/models/task_modules/coders/angle_coder.py index 8db58fa6a..a236e50d2 100644 --- a/mmrotate/models/task_modules/coders/angle_coder.py +++ b/mmrotate/models/task_modules/coders/angle_coder.py @@ -178,4 +178,6 @@ def decode(self, angle: Tensor, keepdim: bool = False) -> Tensor: decode_angle = decode_angle.reshape(*angle.shape[:-1], 1) else: decode_angle = decode_angle.reshape(-1) - return self.angle_range * decode_angle / self.reg_max - self.angle_offset + decode_angle = self.angle_offset * decode_angle / self.reg_max + decode_angle = decode_angle - self.angle_offset + return decode_angle diff --git a/mmrotate/models/task_modules/coders/distance_angle_point_coder.py b/mmrotate/models/task_modules/coders/distance_angle_point_coder.py index 0b7d3f9ec..b92647088 100644 --- a/mmrotate/models/task_modules/coders/distance_angle_point_coder.py +++ b/mmrotate/models/task_modules/coders/distance_angle_point_coder.py @@ -99,7 +99,8 @@ def distance2obb(self, cos_angle, sin_angle = torch.cos(angle), torch.sin(angle) - rot_matrix = torch.cat([cos_angle, -sin_angle, sin_angle, cos_angle], dim=-1) + rot_matrix = torch.cat([cos_angle, -sin_angle, sin_angle, cos_angle], + dim=-1) rot_matrix = rot_matrix.reshape(*rot_matrix.shape[:-1], 2, 2) wh = distance[..., :2] + distance[..., 2:] From 84acb352ee6bcafc6c3bcb72964139e826af4187 Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Wed, 9 Nov 2022 15:43:26 +0800 Subject: [PATCH 07/52] add probiou --- mmrotate/models/losses/__init__.py | 4 +- mmrotate/models/losses/prob_iou_loss.py | 136 ++++++++++++++++++++++++ 2 files changed, 139 insertions(+), 1 deletion(-) create mode 100644 mmrotate/models/losses/prob_iou_loss.py diff --git a/mmrotate/models/losses/__init__.py b/mmrotate/models/losses/__init__.py index 644a4db37..a49cdd0ff 100644 --- a/mmrotate/models/losses/__init__.py +++ b/mmrotate/models/losses/__init__.py @@ -3,6 +3,7 @@ from .gaussian_dist_loss import GDLoss from .gaussian_dist_loss_v1 import GDLoss_v1 from .kf_iou_loss import KFLoss +from .prob_iou_loss import ProbIoULoss from .rd_iou_loss import RDIoULoss from .rotated_iou_loss import RotatedIoULoss from .smooth_focal_loss import SmoothFocalLoss @@ -10,5 +11,6 @@ __all__ = [ 'GDLoss', 'GDLoss_v1', 'KFLoss', 'ConvexGIoULoss', 'BCConvexGIoULoss', - 'SmoothFocalLoss', 'RotatedIoULoss', 'SpatialBorderLoss', 'RDIoULoss' + 'SmoothFocalLoss', 'RotatedIoULoss', 'SpatialBorderLoss', 'RDIoULoss', + 'ProbIoULoss' ] diff --git a/mmrotate/models/losses/prob_iou_loss.py b/mmrotate/models/losses/prob_iou_loss.py new file mode 100644 index 000000000..d8acc0eef --- /dev/null +++ b/mmrotate/models/losses/prob_iou_loss.py @@ -0,0 +1,136 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from mmdet.models import weighted_loss +from torch import nn + +from mmrotate.registry import MODELS + + +def gbb_form(boxes): + return torch.cat( + (boxes[:, :2], torch.pow(boxes[:, 2:4], 2) / 12, boxes[:, 4:]), 1) + + +def rotated_form(a_, b_, angles): + a = a_ * torch.pow(torch.cos(angles), 2.) + b_ * torch.pow( + torch.sin(angles), 2.) + b = a_ * torch.pow(torch.sin(angles), 2.) + b_ * torch.pow( + torch.cos(angles), 2.) + c = a_ * torch.cos(angles) * torch.sin(angles) - b_ * torch.sin( + angles) * torch.cos(angles) + return a, b, c + + +@weighted_loss +def probiou_loss(pred, target, eps=1e-3, mode='l1'): + """pred -> a matrix [N,5](x,y,w,h,angle) containing ours predicted box + target -> a matrix [N,5](x,y,w,h,angle) containing ours target box eps. + + -> threshold to avoid infinite values mode -> ('l1' in [0,1] or 'l2' in + [0,inf]) metrics according our paper. + """ + + gbboxes1 = gbb_form(pred) + gbboxes2 = gbb_form(target) + + (x1, y1, a1_, b1_, c1_) = (gbboxes1[:, 0], gbboxes1[:, 1], gbboxes1[:, 2], + gbboxes1[:, 3], gbboxes1[:, 4]) + (x2, y2, a2_, b2_, c2_) = (gbboxes2[:, 0], gbboxes2[:, 1], gbboxes2[:, 2], + gbboxes2[:, 3], gbboxes2[:, 4]) + + a1, b1, c1 = rotated_form(a1_, b1_, c1_) + a2, b2, c2 = rotated_form(a2_, b2_, c2_) + + t1 = (((a1 + a2) * (torch.pow(y1 - y2, 2)) + (b1 + b2) * + (torch.pow(x1 - x2, 2))) / ((a1 + a2) * (b1 + b2) - + (torch.pow(c1 + c2, 2)) + eps)) * 0.25 + t2 = (((c1 + c2) * (x2 - x1) * (y1 - y2)) / + ((a1 + a2) * (b1 + b2) - (torch.pow(c1 + c2, 2)) + eps)) * 0.5 + t3 = torch.log(((a1 + a2) * (b1 + b2) - (torch.pow(c1 + c2, 2))) / + (4 * torch.sqrt((a1 * b1 - torch.pow(c1, 2)) * + (a2 * b2 - torch.pow(c2, 2))) + eps) + + eps) * 0.5 + + B_d = t1 + t2 + t3 + + B_d = torch.clamp(B_d, eps, 100.0) + l1 = torch.sqrt(1.0 - torch.exp(-B_d) + eps) + l_i = torch.pow(l1, 2.0) + l2 = -torch.log(1.0 - l_i + eps) + + if mode == 'l1': + probiou = l1 + if mode == 'l2': + probiou = l2 + + return probiou + + +@MODELS.register_module() +class ProbIoULoss(nn.Module): + """RotatedIoULoss. + + Computing the IoU loss between a set of predicted rbboxes and + target rbboxes. + Args: + linear (bool): If True, use linear scale of loss else determined + by mode. Default: False. + eps (float): Eps to avoid log(0). + reduction (str): Options are "none", "mean" and "sum". + loss_weight (float): Weight of loss. + mode (str): Loss scaling mode, including "linear", "square", and "log". + Default: 'log' + """ + + def __init__(self, mode='l1', eps=1e-6, reduction='mean', loss_weight=1.0): + super(ProbIoULoss, self).__init__() + + self.mode = mode + self.eps = eps + self.reduction = reduction + self.loss_weight = loss_weight + + def forward(self, + pred, + target, + weight=None, + avg_factor=None, + reduction_override=None, + **kwargs): + """Forward function. + + Args: + pred (torch.Tensor): The prediction. + target (torch.Tensor): The learning target of the prediction. + weight (torch.Tensor, optional): The weight of loss for each + prediction. Defaults to None. + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + reduction_override (str, optional): The reduction method used to + override the original reduction method of the loss. + Defaults to None. Options are "none", "mean" and "sum". + """ + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + if (weight is not None) and (not torch.any(weight > 0)) and ( + reduction != 'none'): + if pred.dim() == weight.dim() + 1: + weight = weight.unsqueeze(1) + return (pred * weight).sum() # 0 + if weight is not None and weight.dim() > 1: + # TODO: remove this in the future + # reduce the weight of shape (n, 5) to (n,) to match the + # iou_loss of shape (n,) + assert weight.shape == pred.shape + weight = weight.mean(-1) + loss = self.loss_weight * probiou_loss( + pred, + target, + weight, + mode=self.mode, + eps=self.eps, + reduction=reduction, + avg_factor=avg_factor, + **kwargs) + return loss From c5f4d20b83426bf22a6a84b23d0579203336f10c Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Wed, 9 Nov 2022 15:44:25 +0800 Subject: [PATCH 08/52] add hrsc cfg --- configs/rotated_rtmdet/_base_/hrsc_rr.py | 76 ++++++++++++++++++ .../rotated_rtmdet_l-3x-hrsc.py | 77 +++++++++++++++++++ .../rotated_rtmdet_m-3x-hrsc.py | 7 ++ .../rotated_rtmdet_s-3x-hrsc.py | 12 +++ .../rotated_rtmdet_tiny-3x-hrsc.py | 19 +++++ .../rotated_rtmdet_x-3x-hrsc.py | 8 ++ 6 files changed, 199 insertions(+) create mode 100644 configs/rotated_rtmdet/_base_/hrsc_rr.py create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_l-3x-hrsc.py create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_m-3x-hrsc.py create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_s-3x-hrsc.py create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-hrsc.py create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_x-3x-hrsc.py diff --git a/configs/rotated_rtmdet/_base_/hrsc_rr.py b/configs/rotated_rtmdet/_base_/hrsc_rr.py new file mode 100644 index 000000000..d2c848aa2 --- /dev/null +++ b/configs/rotated_rtmdet/_base_/hrsc_rr.py @@ -0,0 +1,76 @@ +# dataset settings +dataset_type = 'HRSCDataset' +data_root = '/home/wangchen/liuyanyi/datasets/hrsc/' +file_client_args = dict(backend='disk') + +train_pipeline = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict(type='mmdet.Resize', scale=(800, 800), keep_ratio=True), + dict( + type='mmdet.RandomFlip', + prob=0.75, + direction=['horizontal', 'vertical', 'diagonal']), + dict(type='RandomRotate', prob=0.5, angle_range=180), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict(type='mmdet.PackDetInputs') +] +val_pipeline = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.Resize', scale=(800, 800), keep_ratio=True), + # avoid bboxes being resized + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] +test_pipeline = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.Resize', scale=(800, 800), keep_ratio=True), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] +train_dataloader = dict( + batch_size=8, + num_workers=8, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + batch_sampler=None, + pin_memory=True, + dataset=dict( + type='RepeatDataset', + times=3, + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='ImageSets/trainval.txt', + data_prefix=dict(sub_data_root='FullDataSet/'), + filter_cfg=dict(filter_empty_gt=True), + pipeline=train_pipeline))) +val_dataloader = dict( + batch_size=1, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='ImageSets/test.txt', + data_prefix=dict(sub_data_root='FullDataSet/'), + test_mode=True, + pipeline=val_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='DOTAMetric', + iou_thrs=[0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95], + metric='mAP') +test_evaluator = val_evaluator diff --git a/configs/rotated_rtmdet/rotated_rtmdet_l-3x-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_l-3x-hrsc.py new file mode 100644 index 000000000..01faa26f1 --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_l-3x-hrsc.py @@ -0,0 +1,77 @@ +_base_ = [ + './_base_/default_runtime.py', './_base_/schedule_3x.py', + './_base_/hrsc_rr.py' +] +angle_version = 'le90' +model = dict( + type='mmdet.RTMDet', + data_preprocessor=dict( + type='mmdet.DetDataPreprocessor', + mean=[103.53, 116.28, 123.675], + std=[57.375, 57.12, 58.395], + bgr_to_rgb=False, + boxtype2tensor=False, + batch_augments=None), + backbone=dict( + type='mmdet.CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=1, + widen_factor=1, + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU')), + neck=dict( + type='mmdet.CSPNeXtPAFPN', + in_channels=[256, 512, 1024], + out_channels=256, + num_csp_blocks=3, + expand_ratio=0.5, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU')), + bbox_head=dict( + type='RotatedRTMDetSepBNHead', + # type='RTMDetSepBNHead', + num_classes=15, + in_channels=256, + stacked_convs=2, + feat_channels=256, + angle_version=angle_version, + anchor_generator=dict( + type='mmdet.MlvlPointGenerator', offset=0, strides=[8, 16, 32]), + bbox_coder=dict( + type='DistanceAnglePointCoder', angle_version=angle_version), + loss_cls=dict( + type='mmdet.QualityFocalLoss', + use_sigmoid=True, + beta=2.0, + loss_weight=1.0), + loss_bbox=dict(type='RotatedIoULoss', mode='linear', loss_weight=2.0), + with_objectness=False, + exp_on_reg=True, + share_conv=True, + pred_kernel_size=1, + use_hbbox_loss=False, + scale_angle=False, + # angle_coder=dict( + # type='DistributionAngleCoder', + # angle_version='le90'), + loss_angle=None, + # loss_bbox=dict(type='mmdet.GIoULoss', loss_weight=2.0), + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU')), + train_cfg=dict( + assigner=dict( + type='mmdet.DynamicSoftLabelAssigner', + iou_calculator=dict(type='RBboxOverlaps2D'), + topk=13), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=2000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms_rotated', iou_threshold=0.1), + max_per_img=2000), +) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_m-3x-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_m-3x-hrsc.py new file mode 100644 index 000000000..1225f5119 --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_m-3x-hrsc.py @@ -0,0 +1,7 @@ +_base_ = './rotated_rtmdet_l-3x-hrsc.py' + +# coco_ckpt = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth' # noqa +model = dict( + backbone=dict(deepen_factor=0.67, widen_factor=0.75), + neck=dict(in_channels=[192, 384, 768], out_channels=192, num_csp_blocks=2), + bbox_head=dict(in_channels=192, feat_channels=192)) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_s-3x-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_s-3x-hrsc.py new file mode 100644 index 000000000..8e9604a23 --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_s-3x-hrsc.py @@ -0,0 +1,12 @@ +_base_ = './rotated_rtmdet_l-3x-hrsc.py' + +# coco_ckpt = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth' # noqa +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' # noqa +model = dict( + backbone=dict( + deepen_factor=0.33, + widen_factor=0.5, + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint)), + neck=dict(in_channels=[128, 256, 512], out_channels=128, num_csp_blocks=1), + bbox_head=dict(in_channels=128, feat_channels=128, exp_on_reg=False)) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-hrsc.py new file mode 100644 index 000000000..5238867a8 --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-hrsc.py @@ -0,0 +1,19 @@ +_base_ = './rotated_rtmdet_l-3x-hrsc.py' + +# coco_ckpt = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth' # noqa +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa + +model = dict( + # init_cfg=dict(type='Pretrained', checkpoint=coco_ckpt), + backbone=dict( + deepen_factor=0.167, + widen_factor=0.375, + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint)), + neck=dict(in_channels=[96, 192, 384], out_channels=96, num_csp_blocks=1), + bbox_head=dict( + in_channels=96, + feat_channels=96, + exp_on_reg=False, + loss_bbox=dict(type='RotatedIoULoss', mode='linear', loss_weight=2.0), + )) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_x-3x-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_x-3x-hrsc.py new file mode 100644 index 000000000..a3539c096 --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_x-3x-hrsc.py @@ -0,0 +1,8 @@ +_base_ = './rotated_rtmdet_l-3x-hrsc.py' + +# coco_ckpt = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth' # noqa +model = dict( + backbone=dict(deepen_factor=1.33, widen_factor=1.25), + neck=dict( + in_channels=[320, 640, 1280], out_channels=320, num_csp_blocks=4), + bbox_head=dict(in_channels=320, feat_channels=320)) From e9dfe35a7588cd386802030195c7cf07eae08c77 Mon Sep 17 00:00:00 2001 From: Yanyi Liu Date: Thu, 10 Nov 2022 22:12:17 +0800 Subject: [PATCH 09/52] fix GDLoss with empty input --- mmrotate/models/losses/gaussian_dist_loss.py | 4 +++- mmrotate/models/losses/gaussian_dist_loss_v1.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/mmrotate/models/losses/gaussian_dist_loss.py b/mmrotate/models/losses/gaussian_dist_loss.py index 782441212..3e64b1724 100644 --- a/mmrotate/models/losses/gaussian_dist_loss.py +++ b/mmrotate/models/losses/gaussian_dist_loss.py @@ -386,7 +386,9 @@ def forward(self, reduction_override if reduction_override else self.reduction) if (weight is not None) and (not torch.any(weight > 0)) and ( reduction != 'none'): - return (pred * weight).sum() + if pred.dim() == weight.dim() + 1: + weight = weight.unsqueeze(1) + return (pred * weight).sum() # 0 if weight is not None and weight.dim() > 1: assert weight.shape == pred.shape weight = weight.mean(-1) diff --git a/mmrotate/models/losses/gaussian_dist_loss_v1.py b/mmrotate/models/losses/gaussian_dist_loss_v1.py index 1685ae89c..09f6f4a6a 100644 --- a/mmrotate/models/losses/gaussian_dist_loss_v1.py +++ b/mmrotate/models/losses/gaussian_dist_loss_v1.py @@ -213,7 +213,9 @@ def forward(self, reduction_override if reduction_override else self.reduction) if (weight is not None) and (not torch.any(weight > 0)) and ( reduction != 'none'): - return (pred * weight).sum() + if pred.dim() == weight.dim() + 1: + weight = weight.unsqueeze(1) + return (pred * weight).sum() # 0 if weight is not None and weight.dim() > 1: assert weight.shape == pred.shape weight = weight.mean(-1) From 762f8538b21d9ddf9ee5575062f2cd0c372b6cbe Mon Sep 17 00:00:00 2001 From: Yanyi Liu Date: Thu, 10 Nov 2022 22:13:01 +0800 Subject: [PATCH 10/52] add probiou cfg --- .../rotated_rtmdet_tiny_probiou-3x-hrsc.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_tiny_probiou-3x-hrsc.py diff --git a/configs/rotated_rtmdet/rotated_rtmdet_tiny_probiou-3x-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_tiny_probiou-3x-hrsc.py new file mode 100644 index 000000000..2b4bf41ab --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_tiny_probiou-3x-hrsc.py @@ -0,0 +1,19 @@ +_base_ = './rotated_rtmdet_l-3x-hrsc.py' + +# coco_ckpt = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth' # noqa +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa + +model = dict( + # init_cfg=dict(type='Pretrained', checkpoint=coco_ckpt), + backbone=dict( + deepen_factor=0.167, + widen_factor=0.375, + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint)), + neck=dict(in_channels=[96, 192, 384], out_channels=96, num_csp_blocks=1), + bbox_head=dict( + in_channels=96, + feat_channels=96, + exp_on_reg=False, + loss_bbox=dict(type='ProbIoULoss', mode='l1', loss_weight=2.0), + )) From f9aa704a57fb77f0689957534725393ffa696568 Mon Sep 17 00:00:00 2001 From: Yanyi Liu Date: Tue, 15 Nov 2022 14:17:00 +0800 Subject: [PATCH 11/52] add mixup mosaic cfg --- .../rotated_rtmdet/_base_/default_runtime.py | 11 +- .../_base_/dota_mixup_mosaic.py | 158 ++++++++++++++++++ configs/rotated_rtmdet/_base_/dota_rr.py | 9 + .../_base_/hrsc_mosaic_mixup.py | 132 +++++++++++++++ configs/rotated_rtmdet/_base_/hrsc_rr.py | 9 + .../rotated_rtmdet/_base_/schedule_100e.py | 34 ++++ .../models/dense_heads/rotated_rtmdet_head.py | 10 +- 7 files changed, 350 insertions(+), 13 deletions(-) create mode 100644 configs/rotated_rtmdet/_base_/dota_mixup_mosaic.py create mode 100644 configs/rotated_rtmdet/_base_/hrsc_mosaic_mixup.py create mode 100644 configs/rotated_rtmdet/_base_/schedule_100e.py diff --git a/configs/rotated_rtmdet/_base_/default_runtime.py b/configs/rotated_rtmdet/_base_/default_runtime.py index 724ad4014..0a91907ef 100644 --- a/configs/rotated_rtmdet/_base_/default_runtime.py +++ b/configs/rotated_rtmdet/_base_/default_runtime.py @@ -4,7 +4,7 @@ timer=dict(type='IterTimerHook'), logger=dict(type='LoggerHook', interval=50), param_scheduler=dict(type='ParamSchedulerHook'), - checkpoint=dict(type='CheckpointHook', interval=12, max_keep_ckpts=3), + checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=3), sampler_seed=dict(type='DistSamplerSeedHook'), visualization=dict(type='mmdet.DetVisualizationHook')) @@ -22,12 +22,3 @@ log_level = 'INFO' load_from = None resume = False - -custom_hooks = [ - dict( - type='EMAHook', - ema_type='mmdet.ExpMomentumEMA', - momentum=0.0002, - update_buffers=True, - priority=49) -] diff --git a/configs/rotated_rtmdet/_base_/dota_mixup_mosaic.py b/configs/rotated_rtmdet/_base_/dota_mixup_mosaic.py new file mode 100644 index 000000000..998a45f57 --- /dev/null +++ b/configs/rotated_rtmdet/_base_/dota_mixup_mosaic.py @@ -0,0 +1,158 @@ +# dataset settings +dataset_type = 'DOTADataset' +# data_root = '/home/wangchen/liuyanyi/datasets/dota_mmrotate_ss/' +data_root = '/datasets/dota_mmrotate_ss/' +file_client_args = dict(backend='disk') + +train_pipeline = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict( + type='mmdet.CachedMosaic', + img_scale=(1024, 1024), + pad_val=114.0, + max_cached_images=20, + random_pop=False), + dict( + type='mmdet.RandomResize', + resize_type='mmdet.Resize', + scale=(2048, 2048), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomRotate', prob=0.5, angle_range=180), + dict(type='mmdet.RandomCrop', crop_size=(1024, 1024)), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='mmdet.RandomFlip', + prob=0.75, + direction=['horizontal', 'vertical', 'diagonal']), + dict( + type='mmdet.Pad', size=(1024, 1024), + pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.CachedMixUp', + img_scale=(1024, 1024), + ratio_range=(1.0, 1.0), + max_cached_images=10, + random_pop=False, + pad_val=(114, 114, 114), + prob=0.5), + dict(type='mmdet.PackDetInputs') +] + +train_pipeline_stage2 = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict( + type='mmdet.RandomResize', + resize_type='mmdet.Resize', + scale=(1024, 1024), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomRotate', prob=0.5, angle_range=180), + dict(type='mmdet.RandomCrop', crop_size=(1024, 1024)), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='mmdet.RandomFlip', + prob=0.75, + direction=['horizontal', 'vertical', 'diagonal']), + dict( + type='mmdet.Pad', size=(1024, 1024), + pad_val=dict(img=(114, 114, 114))), + dict(type='mmdet.PackDetInputs') +] + +val_pipeline = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True), + # avoid bboxes being resized + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict( + type='mmdet.Pad', size=(1024, 1024), + pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] +test_pipeline = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True), + dict( + type='mmdet.Pad', size=(1024, 1024), + pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] +train_dataloader = dict( + batch_size=8, + num_workers=8, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + batch_sampler=None, + pin_memory=True, + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='trainval/annfiles/', + data_prefix=dict(img_path='trainval/images/'), + img_shape=(1024, 1024), + filter_cfg=dict(filter_empty_gt=True), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='val/annfiles/', + data_prefix=dict(img_path='val/images/'), + img_shape=(1024, 1024), + test_mode=True, + pipeline=val_pipeline)) +# test_dataloader = val_dataloader + +val_evaluator = dict(type='DOTAMetric', metric='mAP') +# test_evaluator = val_evaluator + +# inference on test dataset and format the output results +# for submission. Note: the test set has no annotation. +test_dataloader = dict( + batch_size=8, + num_workers=8, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict(img_path='test/images/'), + img_shape=(1024, 1024), + test_mode=True, + pipeline=test_pipeline)) +test_evaluator = dict( + type='DOTAMetric', + format_only=True, + merge_patches=True, + outfile_prefix='./work_dirs/rtm_iou_test_coco/Task1') + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='mmdet.ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=30, + switch_pipeline=train_pipeline_stage2) +] diff --git a/configs/rotated_rtmdet/_base_/dota_rr.py b/configs/rotated_rtmdet/_base_/dota_rr.py index e1c9bf46b..96bd7ac6d 100644 --- a/configs/rotated_rtmdet/_base_/dota_rr.py +++ b/configs/rotated_rtmdet/_base_/dota_rr.py @@ -102,3 +102,12 @@ format_only=True, merge_patches=True, outfile_prefix='./work_dirs/rtm_iou_test_coco/Task1') + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='mmdet.ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49) +] diff --git a/configs/rotated_rtmdet/_base_/hrsc_mosaic_mixup.py b/configs/rotated_rtmdet/_base_/hrsc_mosaic_mixup.py new file mode 100644 index 000000000..519c9c1dd --- /dev/null +++ b/configs/rotated_rtmdet/_base_/hrsc_mosaic_mixup.py @@ -0,0 +1,132 @@ +# dataset settings +dataset_type = 'HRSCDataset' +data_root = '/home/wangchen/liuyanyi/datasets/hrsc/' +file_client_args = dict(backend='disk') + +train_pipeline = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict( + type='mmdet.CachedMosaic', + img_scale=(800, 800), + pad_val=114.0, + max_cached_images=20, + random_pop=False), + dict( + type='mmdet.RandomResize', + resize_type='mmdet.Resize', + scale=(1600, 1600), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomRotate', prob=0.5, angle_range=180), + dict(type='mmdet.RandomCrop', crop_size=(800, 800)), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='mmdet.RandomFlip', + prob=0.75, + direction=['horizontal', 'vertical', 'diagonal']), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.CachedMixUp', + img_scale=(800, 800), + ratio_range=(1.0, 1.0), + max_cached_images=10, + random_pop=False, + pad_val=(114, 114, 114), + prob=0.5), + dict(type='mmdet.PackDetInputs') +] + +train_pipeline_stage2 = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict( + type='mmdet.RandomResize', + resize_type='mmdet.Resize', + scale=(800, 800), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomRotate', prob=0.5, angle_range=180), + dict(type='mmdet.RandomCrop', crop_size=(800, 800)), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='mmdet.RandomFlip', + prob=0.75, + direction=['horizontal', 'vertical', 'diagonal']), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict(type='mmdet.PackDetInputs') +] + +val_pipeline = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.Resize', scale=(800, 800), keep_ratio=True), + # avoid bboxes being resized + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] +test_pipeline = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.Resize', scale=(800, 800), keep_ratio=True), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] +train_dataloader = dict( + batch_size=8, + num_workers=8, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + batch_sampler=None, + pin_memory=True, + dataset=dict( + type='RepeatDataset', + times=3, + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='ImageSets/trainval.txt', + data_prefix=dict(sub_data_root='FullDataSet/'), + filter_cfg=dict(filter_empty_gt=True), + pipeline=train_pipeline))) +val_dataloader = dict( + batch_size=1, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='ImageSets/test.txt', + data_prefix=dict(sub_data_root='FullDataSet/'), + test_mode=True, + pipeline=val_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='DOTAMetric', + iou_thrs=[0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95], + metric='mAP') +test_evaluator = val_evaluator + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='mmdet.ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=90, + switch_pipeline=train_pipeline_stage2) +] diff --git a/configs/rotated_rtmdet/_base_/hrsc_rr.py b/configs/rotated_rtmdet/_base_/hrsc_rr.py index d2c848aa2..51e0b7b73 100644 --- a/configs/rotated_rtmdet/_base_/hrsc_rr.py +++ b/configs/rotated_rtmdet/_base_/hrsc_rr.py @@ -74,3 +74,12 @@ iou_thrs=[0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95], metric='mAP') test_evaluator = val_evaluator + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='mmdet.ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49) +] diff --git a/configs/rotated_rtmdet/_base_/schedule_100e.py b/configs/rotated_rtmdet/_base_/schedule_100e.py new file mode 100644 index 000000000..c0ca01a0d --- /dev/null +++ b/configs/rotated_rtmdet/_base_/schedule_100e.py @@ -0,0 +1,34 @@ +max_epochs = 100 +base_lr = 0.004 / 16 +interval = 20 + +train_cfg = dict( + type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=interval) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) diff --git a/mmrotate/models/dense_heads/rotated_rtmdet_head.py b/mmrotate/models/dense_heads/rotated_rtmdet_head.py index b7b7c1cf9..f89cfb920 100644 --- a/mmrotate/models/dense_heads/rotated_rtmdet_head.py +++ b/mmrotate/models/dense_heads/rotated_rtmdet_head.py @@ -219,10 +219,14 @@ def loss_by_feat_single(self, cls_score: Tensor, bbox_pred: Tensor, pos_angle_pred = angle_pred[pos_inds] pos_angle_target = pos_bbox_targets[:, 4:5] pos_angle_target = self.angle_coder.encode(pos_angle_target) + if pos_angle_target.dim() == 2: + pos_angle_weight = pos_bbox_weight.unsqueeze(-1) + else: + pos_angle_weight = pos_bbox_weight loss_angle = self.loss_angle( pos_angle_pred, pos_angle_target, - weight=pos_bbox_weight, + weight=pos_angle_weight, avg_factor=1.0) loss_bbox = self.loss_bbox( @@ -236,8 +240,8 @@ def loss_by_feat_single(self, cls_score: Tensor, bbox_pred: Tensor, pos_bbox_weight = bbox_targets.new_tensor(0.) loss_angle = angle_pred.sum() * 0 - return loss_cls, loss_bbox, loss_angle, assign_metrics.sum( - ), pos_bbox_weight.sum(), pos_bbox_weight.sum() + return (loss_cls, loss_bbox, loss_angle, assign_metrics.sum(), + pos_bbox_weight.sum(), pos_bbox_weight.sum()) def loss_by_feat(self, cls_scores: List[Tensor], From 81d50df27dd332a80295ab3935c5ac274fe88260 Mon Sep 17 00:00:00 2001 From: Yanyi Liu Date: Thu, 13 Oct 2022 12:40:31 +0800 Subject: [PATCH 12/52] Add DistributionAngleCoder --- ..._fcos_distribution_r50_fpn_6x_hrsc_le90.py | 68 +++++++++++++++++++ .../models/task_modules/coders/__init__.py | 5 +- .../models/task_modules/coders/angle_coder.py | 30 ++++++++ 3 files changed, 101 insertions(+), 2 deletions(-) create mode 100644 configs/rotated_fcos/rotated_fcos_distribution_r50_fpn_6x_hrsc_le90.py diff --git a/configs/rotated_fcos/rotated_fcos_distribution_r50_fpn_6x_hrsc_le90.py b/configs/rotated_fcos/rotated_fcos_distribution_r50_fpn_6x_hrsc_le90.py new file mode 100644 index 000000000..e7a9d8f35 --- /dev/null +++ b/configs/rotated_fcos/rotated_fcos_distribution_r50_fpn_6x_hrsc_le90.py @@ -0,0 +1,68 @@ +_base_ = [ + '../_base_/datasets/hrsc.py', '../_base_/schedules/schedule_6x.py', + '../_base_/default_runtime.py' +] +angle_version = 'le90' + +# model settings +model = dict( + type='mmdet.FCOS', + data_preprocessor=dict( + type='mmdet.DetDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_size_divisor=32, + boxtype2tensor=False), + backbone=dict( + type='mmdet.ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + type='mmdet.FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_output', + num_outs=5, + relu_before_extra_convs=True), + bbox_head=dict( + type='RotatedFCOSHead', + num_classes=1, + in_channels=256, + stacked_convs=4, + feat_channels=256, + strides=[8, 16, 32, 64, 128], + center_sampling=True, + center_sample_radius=1.5, + norm_on_bbox=True, + centerness_on_reg=True, + use_hbbox_loss=False, + scale_angle=True, + bbox_coder=dict( + type='DistanceAnglePointCoder', angle_version=angle_version), + loss_cls=dict( + type='mmdet.FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='RotatedIoULoss', loss_weight=1.0), + angle_coder=dict(type='DistributionAngleCoder', reg_max=16), + loss_angle=dict(type='mmdet.DistributionFocalLoss', loss_weight=0.20), + loss_centerness=dict( + type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), + # training and testing settings + train_cfg=None, + test_cfg=dict( + nms_pre=2000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms_rotated', iou_threshold=0.1), + max_per_img=2000)) diff --git a/mmrotate/models/task_modules/coders/__init__.py b/mmrotate/models/task_modules/coders/__init__.py index 0f903e7b1..ec9f54117 100644 --- a/mmrotate/models/task_modules/coders/__init__.py +++ b/mmrotate/models/task_modules/coders/__init__.py @@ -1,5 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .angle_coder import CSLCoder, PseudoAngleCoder +from .angle_coder import CSLCoder, DistributionAngleCoder, PseudoAngleCoder from .delta_midpointoffset_rbbox_coder import MidpointOffsetCoder from .delta_xywh_hbbox_coder import DeltaXYWHHBBoxCoder from .delta_xywh_qbbox_coder import DeltaXYWHQBBoxCoder @@ -11,5 +11,6 @@ __all__ = [ 'DeltaXYWHTRBBoxCoder', 'DeltaXYWHTHBBoxCoder', 'MidpointOffsetCoder', 'GVFixCoder', 'GVRatioCoder', 'CSLCoder', 'DistanceAnglePointCoder', - 'DeltaXYWHHBBoxCoder', 'DeltaXYWHQBBoxCoder', 'PseudoAngleCoder' + 'DeltaXYWHHBBoxCoder', 'DeltaXYWHQBBoxCoder', 'PseudoAngleCoder', + 'DistributionAngleCoder' ] diff --git a/mmrotate/models/task_modules/coders/angle_coder.py b/mmrotate/models/task_modules/coders/angle_coder.py index 31070180a..9f6ecbdb8 100644 --- a/mmrotate/models/task_modules/coders/angle_coder.py +++ b/mmrotate/models/task_modules/coders/angle_coder.py @@ -1,7 +1,9 @@ # Copyright (c) OpenMMLab. All rights reserved. import math +import numpy as np import torch +import torch.nn.functional as F from mmdet.models.task_modules.coders.base_bbox_coder import BaseBBoxCoder from torch import Tensor @@ -139,3 +141,31 @@ def decode(self, angle_preds: Tensor, keepdim: bool = False) -> Tensor: return angle_preds else: return angle_preds.squeeze(-1) + + +@TASK_UTILS.register_module() +class DistributionAngleCoder(BaseBBoxCoder): + + def __init__(self, angle_version='le90', reg_max=16): + super().__init__() + self.angle_range = 0.5 * np.pi if angle_version == 'oc' else np.pi + self.angle_offset_dict = { + 'oc': 0, + 'le90': 0.5 * np.pi, + 'le135': 0.25 * np.pi + } + self.angle_offset = self.angle_offset_dict[angle_version] + self.reg_max = reg_max + self.encode_size = reg_max + 1 + self.project = torch.linspace(0, self.reg_max, self.reg_max + 1) + + def encode(self, angle): + # Norm to (0~1)*reg_max + dfl_target = self.reg_max * (self.angle_offset + + angle) / self.angle_range + return dfl_target.flatten() + + def decode(self, angle, keepdim=True): + angle = F.softmax(angle.reshape(-1, self.reg_max + 1), dim=-1) + angle = F.linear(angle, self.project.type_as(angle)).reshape(-1, 1) + return self.angle_range * angle / self.reg_max - self.angle_offset From 371ed4ba22813d6989e51671ce12065af5f07350 Mon Sep 17 00:00:00 2001 From: Yanyi Liu Date: Wed, 19 Oct 2022 19:38:20 +0800 Subject: [PATCH 13/52] fix keepdim --- .../models/task_modules/coders/angle_coder.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/mmrotate/models/task_modules/coders/angle_coder.py b/mmrotate/models/task_modules/coders/angle_coder.py index 9f6ecbdb8..7941cd423 100644 --- a/mmrotate/models/task_modules/coders/angle_coder.py +++ b/mmrotate/models/task_modules/coders/angle_coder.py @@ -145,8 +145,14 @@ def decode(self, angle_preds: Tensor, keepdim: bool = False) -> Tensor: @TASK_UTILS.register_module() class DistributionAngleCoder(BaseBBoxCoder): + """Distribution representation for angle. - def __init__(self, angle_version='le90', reg_max=16): + Args: + angle_version (str): Angle definition. + reg_max (int): Max value of integral. Defaults to 16. + """ + + def __init__(self, angle_version: str = 'le90', reg_max: int = 16): super().__init__() self.angle_range = 0.5 * np.pi if angle_version == 'oc' else np.pi self.angle_offset_dict = { @@ -159,13 +165,17 @@ def __init__(self, angle_version='le90', reg_max=16): self.encode_size = reg_max + 1 self.project = torch.linspace(0, self.reg_max, self.reg_max + 1) - def encode(self, angle): + def encode(self, angle: Tensor) -> Tensor: # Norm to (0~1)*reg_max dfl_target = self.reg_max * (self.angle_offset + angle) / self.angle_range return dfl_target.flatten() - def decode(self, angle, keepdim=True): + def decode(self, angle: Tensor, keepdim: bool = False) -> Tensor: angle = F.softmax(angle.reshape(-1, self.reg_max + 1), dim=-1) - angle = F.linear(angle, self.project.type_as(angle)).reshape(-1, 1) + angle = F.linear(angle, self.project.type_as(angle)) + if keepdim: + angle = angle.reshape(-1, 1) + else: + angle = angle.reshape(-1) return self.angle_range * angle / self.reg_max - self.angle_offset From bdd9388b5a1fc0bd651520f7760b705e00dd222e Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Tue, 1 Nov 2022 10:47:49 +0800 Subject: [PATCH 14/52] rtm init --- .../rotated_rtmdet_l-300e-hrsc.py | 208 ++++ .../rotated_rtmdet_s-300e-hrsc.py | 70 ++ .../rotated_rtmdet_tiny-300e-hrsc.py | 50 + mmrotate/models/dense_heads/__init__.py | 4 +- .../models/dense_heads/rotated_rtmdet_head.py | 910 ++++++++++++++++++ .../coders/distance_angle_point_coder.py | 16 +- 6 files changed, 1249 insertions(+), 9 deletions(-) create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_l-300e-hrsc.py create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_s-300e-hrsc.py create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_tiny-300e-hrsc.py create mode 100644 mmrotate/models/dense_heads/rotated_rtmdet_head.py diff --git a/configs/rotated_rtmdet/rotated_rtmdet_l-300e-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_l-300e-hrsc.py new file mode 100644 index 000000000..e43377a27 --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_l-300e-hrsc.py @@ -0,0 +1,208 @@ +_base_ = [ + '../_base_/default_runtime.py', '../_base_/schedules/schedule_1x.py', + '../_base_/datasets/hrsc.py' +] +model = dict( + type='mmdet.RTMDet', + data_preprocessor=dict( + type='mmdet.DetDataPreprocessor', + mean=[103.53, 116.28, 123.675], + std=[57.375, 57.12, 58.395], + bgr_to_rgb=False, + boxtype2tensor=False, + batch_augments=None), + backbone=dict( + type='mmdet.CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=1, + widen_factor=1, + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU')), + neck=dict( + type='mmdet.CSPNeXtPAFPN', + in_channels=[256, 512, 1024], + out_channels=256, + num_csp_blocks=3, + expand_ratio=0.5, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU')), + bbox_head=dict( + type='RotatedRTMDetSepBNHead', + # type='RTMDetSepBNHead', + num_classes=1, + in_channels=256, + stacked_convs=2, + feat_channels=256, + anchor_generator=dict( + type='mmdet.MlvlPointGenerator', offset=0, strides=[8, 16, 32]), + bbox_coder=dict(type='DistanceAnglePointCoder', angle_version='le90'), + loss_cls=dict( + type='mmdet.QualityFocalLoss', + use_sigmoid=True, + beta=2.0, + loss_weight=1.0), + loss_bbox=dict(type='RotatedIoULoss', mode='linear', loss_weight=2.0), + with_objectness=False, + exp_on_reg=True, + share_conv=True, + pred_kernel_size=1, + use_hbbox_loss=False, + scale_angle=False, + # angle_coder=dict( + # _scope_='mmrotate', + # type='CSLCoder', + # angle_version='le90', + # omega=4, + # window='gaussian', + # radius=3), + # loss_angle=dict( + # _scope_='mmrotate', + # type='SmoothFocalLoss', + # gamma=2.0, + # alpha=0.25, + # loss_weight=0.8), + # loss_bbox=dict(type='mmdet.GIoULoss', loss_weight=2.0), + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU')), + train_cfg=dict( + assigner=dict( + type='mmdet.DynamicSoftLabelAssigner', + iou_calculator=dict(type='RBboxOverlaps2D'), + topk=13), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=2000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms_rotated', iou_threshold=0.1), + max_per_img=2000), +) + +train_pipeline = [ + dict( + type='mmdet.LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict(type='mmdet.CachedMosaic', img_scale=(800, 800), pad_val=114.0), + dict( + type='mmdet.RandomResize', + resize_type='mmdet.Resize', + scale=(1600, 1600), + ratio_range=(0.1, 2.0), + keep_ratio=True), + dict(type='mmdet.RandomCrop', crop_size=(800, 800)), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict(type='mmdet.RandomFlip', prob=0.5), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.CachedMixUp', + img_scale=(800, 800), + ratio_range=(1.0, 1.0), + max_cached_images=20, + pad_val=(114, 114, 114)), + dict(type='mmdet.PackDetInputs') +] + +train_pipeline_stage2 = [ + dict( + type='mmdet.LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict( + type='mmdet.RandomResize', + resize_type='mmdet.Resize', + scale=(1600, 1600), + ratio_range=(0.1, 2.0), + keep_ratio=True), + dict(type='mmdet.RandomCrop', crop_size=(800, 800)), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict(type='mmdet.RandomFlip', prob=0.5), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict(type='mmdet.PackDetInputs') +] + +test_pipeline = [ + dict( + type='mmdet.LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), + dict(type='mmdet.Resize', scale=(800, 800), keep_ratio=True), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] + +train_dataloader = dict( + batch_size=8, + num_workers=8, + batch_sampler=None, + pin_memory=True, + dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, num_workers=1, dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader + +max_epochs = 300 +stage2_num_epochs = 20 +base_lr = 0.004 / 4 +interval = 20 + +train_cfg = dict( + max_epochs=max_epochs, + val_interval=interval, + dynamic_intervals=[(max_epochs - stage2_num_epochs, 10)]) + +# optimizer +optim_wrapper = dict( + _delete_=True, + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# hooks +default_hooks = dict( + checkpoint=dict( + interval=interval, + max_keep_ckpts=3 # only keep latest 3 checkpoints + )) +custom_hooks = [ + dict( + type='EMAHook', + ema_type='mmdet.ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] diff --git a/configs/rotated_rtmdet/rotated_rtmdet_s-300e-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_s-300e-hrsc.py new file mode 100644 index 000000000..a3b52616d --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_s-300e-hrsc.py @@ -0,0 +1,70 @@ +_base_ = './rotated_rtmdet_l-300e-hrsc.py' +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' # noqa +model = dict( + backbone=dict( + deepen_factor=0.33, + widen_factor=0.5, + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint)), + neck=dict(in_channels=[128, 256, 512], out_channels=128, num_csp_blocks=1), + bbox_head=dict(in_channels=128, feat_channels=128, exp_on_reg=False)) + +train_pipeline = [ + dict( + type='LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict(type='mmdet.CachedMosaic', img_scale=(800, 800), pad_val=114.0), + dict( + type='RandomResize', + resize_type='mmdet.Resize', + scale=(1600, 1600), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='mmdet.RandomCrop', crop_size=(800, 800)), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict(type='mmdet.RandomFlip', prob=0.5), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.CachedMixUp', + img_scale=(800, 800), + ratio_range=(1.0, 1.0), + max_cached_images=20, + pad_val=(114, 114, 114)), + dict(type='mmdet.PackDetInputs') +] + +train_pipeline_stage2 = [ + dict( + type='LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict( + type='RandomResize', + resize_type='mmdet.Resize', + scale=(1600, 1600), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='mmdet.RandomCrop', crop_size=(800, 800)), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict(type='mmdet.RandomFlip', prob=0.5), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict(type='mmdet.PackDetInputs') +] + +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='mmdet.ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=280, + switch_pipeline=train_pipeline_stage2) +] \ No newline at end of file diff --git a/configs/rotated_rtmdet/rotated_rtmdet_tiny-300e-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_tiny-300e-hrsc.py new file mode 100644 index 000000000..1df2d99b3 --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_tiny-300e-hrsc.py @@ -0,0 +1,50 @@ +_base_ = './rotated_rtmdet_s-300e-hrsc.py' + +cocop = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth' # noqa +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa + +model = dict( + init_cfg=dict(type='Pretrained', checkpoint=cocop), + backbone=dict( + deepen_factor=0.167, + widen_factor=0.375, + # init_cfg=dict( + # type='Pretrained', prefix='backbone.', checkpoint=checkpoint) + ), + neck=dict(in_channels=[96, 192, 384], out_channels=96, num_csp_blocks=1), + bbox_head=dict(in_channels=96, feat_channels=96, exp_on_reg=False)) + +train_pipeline = [ + dict( + type='LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict( + type='mmdet.CachedMosaic', + img_scale=(800, 800), + pad_val=114.0, + max_cached_images=20, + random_pop=False), + dict( + type='RandomResize', + resize_type='mmdet.Resize', + scale=(1600, 1600), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='mmdet.RandomCrop', crop_size=(800, 800)), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict(type='mmdet.RandomFlip', prob=0.5), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.CachedMixUp', + img_scale=(800, 800), + ratio_range=(1.0, 1.0), + max_cached_images=10, + random_pop=False, + pad_val=(114, 114, 114), + prob=0.5), + dict(type='mmdet.PackDetInputs') +] + +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/mmrotate/models/dense_heads/__init__.py b/mmrotate/models/dense_heads/__init__.py index d2ac36c40..311aa2c83 100644 --- a/mmrotate/models/dense_heads/__init__.py +++ b/mmrotate/models/dense_heads/__init__.py @@ -10,10 +10,12 @@ from .rotated_retina_head import RotatedRetinaHead from .s2a_head import S2AHead, S2ARefineHead from .sam_reppoints_head import SAMRepPointsHead +from .rotated_rtmdet_head import RotatedRTMDetHead, RotatedRTMDetSepBNHead __all__ = [ 'RotatedRetinaHead', 'OrientedRPNHead', 'RotatedRepPointsHead', 'SAMRepPointsHead', 'AngleBranchRetinaHead', 'RotatedATSSHead', 'RotatedFCOSHead', 'OrientedRepPointsHead', 'R3Head', 'R3RefineHead', - 'S2AHead', 'S2ARefineHead', 'CFAHead' + 'S2AHead', 'S2ARefineHead', 'CFAHead', 'RotatedRTMDetHead', + 'RotatedRTMDetSepBNHead' ] diff --git a/mmrotate/models/dense_heads/rotated_rtmdet_head.py b/mmrotate/models/dense_heads/rotated_rtmdet_head.py new file mode 100644 index 000000000..e0f1fbdde --- /dev/null +++ b/mmrotate/models/dense_heads/rotated_rtmdet_head.py @@ -0,0 +1,910 @@ +import copy +from typing import Tuple, List, Optional + +import torch +from mmcv.cnn import Scale, ConvModule, is_norm +from mmdet.models import inverse_sigmoid +from mmdet.models.dense_heads import RTMDetHead +from mmdet.models.task_modules import anchor_inside_flags +from mmdet.models.utils import sigmoid_geometric_mean, multi_apply, unmap, select_single_mlvl, filter_scores_and_topk +from mmdet.structures.bbox import distance2bbox, get_box_tensor, cat_boxes, bbox_cxcywh_to_xyxy +from mmdet.utils import ConfigType, OptConfigType, InstanceList, OptInstanceList, reduce_mean +from mmengine import ConfigDict +from mmengine.model import normal_init, constant_init, bias_init_with_prob +from mmengine.structures import InstanceData +from mmrotate.registry import MODELS, TASK_UTILS + +from mmrotate.models.task_modules.coders import DistanceAnglePointCoder +from mmrotate.structures import norm_angle, RotatedBoxes +from torch import nn, Tensor + + +# TODO move to mmrotate.structures.transform and update Coder +def distance2obb(points, + distance, + angle_version='oc'): + distance, angle = distance.split([4, 1], dim=-1) + + cos_angle, sin_angle = torch.cos(angle), torch.sin(angle) + + rot_matrix = torch.cat([cos_angle, -sin_angle, sin_angle, cos_angle], dim=-1) + rot_matrix = rot_matrix.reshape(*rot_matrix.shape[:-1], 2, 2) + + wh = distance[..., :2] + distance[..., 2:] + offset_t = (distance[..., 2:] - distance[..., :2]) / 2 + offset_t = offset_t.unsqueeze(-1) + offset = torch.matmul(rot_matrix, offset_t).squeeze(-1) + ctr = points[..., :2] + offset + + angle_regular = norm_angle(angle, angle_version) + return torch.cat([ctr, wh, angle_regular], dim=-1) + + +@MODELS.register_module() +class RotatedRTMDetHead(RTMDetHead): + """Detection Head of Rotated RTMDet. + + Args: + num_classes (int): Number of categories excluding the background + category. + in_channels (int): Number of channels in the input feature map. + with_objectness (bool): Whether to add an objectness branch. + Defaults to True. + act_cfg (:obj:`ConfigDict` or dict): Config dict for activation layer. + Default: dict(type='ReLU') + """ + + def __init__(self, + num_classes: int, + in_channels: int, + use_hbbox_loss: bool = False, + scale_angle: bool = True, + angle_coder: ConfigType = dict(type='PseudoAngleCoder'), + loss_angle: OptConfigType = None, + **kwargs) -> None: + self.use_hbbox_loss = use_hbbox_loss + self.is_scale_angle = scale_angle + self.angle_coder = TASK_UTILS.build(angle_coder) + super().__init__( + num_classes, + in_channels, + # useless, but error + loss_centerness=dict( + type='mmdet.CrossEntropyLoss', + use_sigmoid=True, + loss_weight=1.0), + **kwargs) + if loss_angle is not None: + self.loss_angle = MODELS.build(loss_angle) + else: + self.loss_angle = None + + def _init_layers(self): + """Initialize layers of the head.""" + super()._init_layers() + pred_pad_size = self.pred_kernel_size // 2 + self.rtm_ang = nn.Conv2d( + self.feat_channels, + self.num_base_priors * self.angle_coder.encode_size, + self.pred_kernel_size, + padding=pred_pad_size) + if self.is_scale_angle: + self.scale_angle = Scale(1.0) + + def init_weights(self) -> None: + """Initialize weights of the head.""" + super().init_weights() + normal_init(self.rtm_ang, std=0.01) + + def forward(self, feats: Tuple[Tensor, ...]) -> tuple: + """Forward features from the upstream network. + + Args: + feats (tuple[Tensor]): Features from the upstream network, each is + a 4D-tensor. + + Returns: + tuple: Usually a tuple of classification scores and bbox prediction + - cls_scores (list[Tensor]): Classification scores for all scale + levels, each is a 4D-tensor, the channels number is + num_base_priors * num_classes. + - bbox_preds (list[Tensor]): Box energies / deltas for all scale + levels, each is a 4D-tensor, the channels number is + num_base_priors * 4. + - angle_preds (list[Tensor]): Angle prediction for all scale + levels, each is a 4D-tensor, the channels number is + num_base_priors * angle_dim. + """ + + cls_scores = [] + bbox_preds = [] + angle_preds = [] + for idx, (x, scale, stride) in enumerate( + zip(feats, self.scales, self.prior_generator.strides)): + cls_feat = x + reg_feat = x + + for cls_layer in self.cls_convs: + cls_feat = cls_layer(cls_feat) + cls_score = self.rtm_cls(cls_feat) + + for reg_layer in self.reg_convs: + reg_feat = reg_layer(reg_feat) + + if self.with_objectness: + objectness = self.rtm_obj(reg_feat) + cls_score = inverse_sigmoid( + sigmoid_geometric_mean(cls_score, objectness)) + + reg_dist = scale(self.rtm_reg(reg_feat).exp()).float() * stride[0] + if self.is_scale_angle: + angle_pred = self.scale_angle(self.rtm_ang(reg_feat)).float() + else: + angle_pred = self.rtm_ang(reg_feat).float() + + cls_scores.append(cls_score) + bbox_preds.append(reg_dist) + angle_preds.append(angle_pred) + return tuple(cls_scores), tuple(bbox_preds), tuple(angle_preds) + + def loss_by_feat_single(self, cls_score: Tensor, bbox_pred: Tensor, angle_pred: Tensor, + labels: Tensor, label_weights: Tensor, + bbox_targets: Tensor, assign_metrics: Tensor, + stride: List[int]): + """Compute loss of a single scale level. + + Args: + cls_score (Tensor): Box scores for each scale level + Has shape (N, num_anchors * num_classes, H, W). + bbox_pred (Tensor): Decoded bboxes for each scale + level with shape (N, num_anchors * 4, H, W). + labels (Tensor): Labels of each anchors with shape + (N, num_total_anchors). + label_weights (Tensor): Label weights of each anchor with shape + (N, num_total_anchors). + bbox_targets (Tensor): BBox regression targets of each anchor with + shape (N, num_total_anchors, 4). + assign_metrics (Tensor): Assign metrics with shape + (N, num_total_anchors). + stride (List[int]): Downsample stride of the feature map. + + Returns: + dict[str, Tensor]: A dictionary of loss components. + """ + assert stride[0] == stride[1], 'h stride is not equal to w stride!' + cls_score = cls_score.permute(0, 2, 3, 1).reshape( + -1, self.cls_out_channels).contiguous() + + if self.use_hbbox_loss: + bbox_pred = bbox_pred.reshape(-1, 4) + else: + bbox_pred = bbox_pred.reshape(-1, 5) + bbox_targets = bbox_targets.reshape(-1, 5) + + labels = labels.reshape(-1) + assign_metrics = assign_metrics.reshape(-1) + label_weights = label_weights.reshape(-1) + targets = (labels, assign_metrics) + + loss_cls = self.loss_cls( + cls_score, targets, label_weights, avg_factor=1.0) + + # FG cat_id: [0, num_classes -1], BG cat_id: num_classes + bg_class_ind = self.num_classes + pos_inds = ((labels >= 0) + & (labels < bg_class_ind)).nonzero().squeeze(1) + + if len(pos_inds) > 0: + pos_bbox_targets = bbox_targets[pos_inds] + pos_bbox_pred = bbox_pred[pos_inds] + + pos_decode_bbox_pred = pos_bbox_pred + pos_decode_bbox_targets = pos_bbox_targets + if self.use_hbbox_loss: + pos_decode_bbox_targets = bbox_cxcywh_to_xyxy(pos_bbox_targets[:, :4]) + + # regression loss + pos_bbox_weight = assign_metrics[pos_inds] + + loss_angle = angle_pred.sum() * 0 + if self.loss_angle is not None: + angle_pred = angle_pred.reshape(-1, self.angle_coder.encode_size) + pos_angle_pred = angle_pred[pos_inds] + pos_angle_target = pos_bbox_targets[:, 4:5] + pos_angle_target = self.angle_coder.encode(pos_angle_target) + loss_angle = self.loss_angle( + pos_angle_pred, + pos_angle_target, + weight=pos_bbox_weight, + avg_factor=1.0) + + loss_bbox = self.loss_bbox( + pos_decode_bbox_pred, + pos_decode_bbox_targets, + weight=pos_bbox_weight, + avg_factor=1.0) + + else: + loss_bbox = bbox_pred.sum() * 0 + pos_bbox_weight = bbox_targets.new_tensor(0.) + loss_angle = angle_pred.sum() * 0 + + return loss_cls, loss_bbox, loss_angle, assign_metrics.sum(), pos_bbox_weight.sum(), pos_bbox_weight.sum() + + def loss_by_feat(self, + cls_scores: List[Tensor], + bbox_preds: List[Tensor], + angle_preds: List[Tensor], + batch_gt_instances: InstanceList, + batch_img_metas: List[dict], + batch_gt_instances_ignore: OptInstanceList = None): + """Compute losses of the head. + + Args: + cls_scores (list[Tensor]): Box scores for each scale level + Has shape (N, num_anchors * num_classes, H, W) + bbox_preds (list[Tensor]): Decoded box for each scale + level with shape (N, num_anchors * 4, H, W) in + [tl_x, tl_y, br_x, br_y] format. + batch_gt_instances (list[:obj:`InstanceData`]): Batch of + gt_instance. It usually includes ``bboxes`` and ``labels`` + attributes. + batch_img_metas (list[dict]): Meta information of each image, e.g., + image size, scaling factor, etc. + batch_gt_instances_ignore (list[:obj:`InstanceData`], Optional): + Batch of gt_instances_ignore. It includes ``bboxes`` attribute + data that is ignored during training and testing. + Defaults to None. + + Returns: + dict[str, Tensor]: A dictionary of loss components. + """ + num_imgs = len(batch_img_metas) + featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] + assert len(featmap_sizes) == self.prior_generator.num_levels + + device = cls_scores[0].device + anchor_list, valid_flag_list = self.get_anchors( + featmap_sizes, batch_img_metas, device=device) + flatten_cls_scores = torch.cat([ + cls_score.permute(0, 2, 3, 1).reshape(num_imgs, -1, + self.cls_out_channels) + for cls_score in cls_scores + ], 1) + + decoded_bboxes = [] + decoded_hbboxes = [] + angle_preds_list = [] + for anchor, bbox_pred, angle_pred in zip(anchor_list[0], bbox_preds, angle_preds): + anchor = anchor.reshape(-1, 4) + bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, 4) + angle_pred = angle_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, self.angle_coder.encode_size) + + if self.use_hbbox_loss: + hbbox_pred = distance2bbox(anchor, bbox_pred) + decoded_hbboxes.append(hbbox_pred) + + decoded_angle = self.angle_coder.decode(angle_pred, keepdim=True) + bbox_pred = torch.cat([bbox_pred, decoded_angle], dim=-1) + + # res = distance2obb(anchor, bbox_pred, 'le90').reshape(-1, 5) + # + # c = DistanceAnglePointCoder(angle_version='le90') + # res2 = c.decode(anchor.repeat(8, 1)[:, :2], bbox_pred.reshape(-1, 5)) + # + # print((res == res2).unique()) + + # TODO add arg angle_version + bbox_pred = distance2obb(anchor, bbox_pred, angle_version='le90') + decoded_bboxes.append(bbox_pred) + angle_preds_list.append(angle_pred) + + # flatten_bboxes is rbox, for target assign + flatten_bboxes = torch.cat(decoded_bboxes, 1) + + cls_reg_targets = self.get_targets( + flatten_cls_scores, + flatten_bboxes, + anchor_list, + valid_flag_list, + batch_gt_instances, + batch_img_metas, + batch_gt_instances_ignore=batch_gt_instances_ignore) + (anchor_list, labels_list, label_weights_list, bbox_targets_list, + assign_metrics_list) = cls_reg_targets + + if self.use_hbbox_loss: + decoded_bboxes = decoded_hbboxes + + losses_cls, losses_bbox, losses_angle, \ + cls_avg_factors, bbox_avg_factors, angle_avg_factors = multi_apply( + self.loss_by_feat_single, + cls_scores, + decoded_bboxes, + angle_preds_list, + labels_list, + label_weights_list, + bbox_targets_list, + assign_metrics_list, + self.prior_generator.strides) + + cls_avg_factor = reduce_mean(sum(cls_avg_factors)).clamp_(min=1).item() + losses_cls = list(map(lambda x: x / cls_avg_factor, losses_cls)) + + bbox_avg_factor = reduce_mean( + sum(bbox_avg_factors)).clamp_(min=1).item() + losses_bbox = list(map(lambda x: x / bbox_avg_factor, losses_bbox)) + if self.loss_angle is not None: + angle_avg_factors = reduce_mean( + sum(angle_avg_factors)).clamp_(min=1).item() + losses_angle = list(map(lambda x: x / angle_avg_factors, losses_angle)) + return dict(loss_cls=losses_cls, loss_bbox=losses_bbox, loss_angle=losses_angle) + else: + return dict(loss_cls=losses_cls, loss_bbox=losses_bbox) + + def _get_targets_single(self, + cls_scores: Tensor, + bbox_preds: Tensor, + flat_anchors: Tensor, + valid_flags: Tensor, + gt_instances: InstanceData, + img_meta: dict, + gt_instances_ignore: Optional[InstanceData] = None, + unmap_outputs=True): + """Compute regression, classification targets for anchors in a single + image. + + Args: + cls_scores (list(Tensor)): Box scores for each image. + bbox_preds (list(Tensor)): Box energies / deltas for each image. + flat_anchors (Tensor): Multi-level anchors of the image, which are + concatenated into a single tensor of shape (num_anchors ,4) + valid_flags (Tensor): Multi level valid flags of the image, + which are concatenated into a single tensor of + shape (num_anchors,). + gt_instances (:obj:`InstanceData`): Ground truth of instance + annotations. It usually includes ``bboxes`` and ``labels`` + attributes. + img_meta (dict): Meta information for current image. + gt_instances_ignore (:obj:`InstanceData`, optional): Instances + to be ignored during training. It includes ``bboxes`` attribute + data that is ignored during training and testing. + Defaults to None. + unmap_outputs (bool): Whether to map outputs back to the original + set of anchors. Defaults to True. + + Returns: + tuple: N is the number of total anchors in the image. + + - anchors (Tensor): All anchors in the image with shape (N, 4). + - labels (Tensor): Labels of all anchors in the image with shape + (N,). + - label_weights (Tensor): Label weights of all anchor in the + image with shape (N,). + - bbox_targets (Tensor): BBox targets of all anchors in the + image with shape (N, 4). + - norm_alignment_metrics (Tensor): Normalized alignment metrics + of all priors in the image with shape (N,). + """ + inside_flags = anchor_inside_flags(flat_anchors, valid_flags, + img_meta['img_shape'][:2], + self.train_cfg['allowed_border']) + if not inside_flags.any(): + return (None,) * 7 + # assign gt and sample anchors + anchors = flat_anchors[inside_flags, :] + + pred_instances = InstanceData( + scores=cls_scores[inside_flags, :], + bboxes=bbox_preds[inside_flags, :], + priors=anchors) + + assign_result = self.assigner.assign(pred_instances, gt_instances, + gt_instances_ignore) + + sampling_result = self.sampler.sample(assign_result, pred_instances, + gt_instances) + + num_valid_anchors = anchors.shape[0] + bbox_targets = anchors.new_zeros((*anchors.size()[:-1], 5)) + labels = anchors.new_full((num_valid_anchors,), + self.num_classes, + dtype=torch.long) + label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) + assign_metrics = anchors.new_zeros( + num_valid_anchors, dtype=torch.float) + + pos_inds = sampling_result.pos_inds + neg_inds = sampling_result.neg_inds + if len(pos_inds) > 0: + # point-based + pos_bbox_targets = sampling_result.pos_gt_bboxes + # TODO add arg angle_version + pos_bbox_targets = pos_bbox_targets.regularize_boxes('le90') + bbox_targets[pos_inds, :] = pos_bbox_targets + + labels[pos_inds] = sampling_result.pos_gt_labels + if self.train_cfg['pos_weight'] <= 0: + label_weights[pos_inds] = 1.0 + else: + label_weights[pos_inds] = self.train_cfg['pos_weight'] + if len(neg_inds) > 0: + label_weights[neg_inds] = 1.0 + + class_assigned_gt_inds = torch.unique( + sampling_result.pos_assigned_gt_inds) + for gt_inds in class_assigned_gt_inds: + gt_class_inds = pos_inds[sampling_result.pos_assigned_gt_inds == + gt_inds] + assign_metrics[gt_class_inds] = assign_result.max_overlaps[ + gt_class_inds] + + # map up to original set of anchors + if unmap_outputs: + num_total_anchors = flat_anchors.size(0) + anchors = unmap(anchors, num_total_anchors, inside_flags) + labels = unmap( + labels, num_total_anchors, inside_flags, fill=self.num_classes) + label_weights = unmap(label_weights, num_total_anchors, + inside_flags) + bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) + assign_metrics = unmap(assign_metrics, num_total_anchors, + inside_flags) + return (anchors, labels, label_weights, bbox_targets, assign_metrics) + + def predict_by_feat(self, + cls_scores: List[Tensor], + bbox_preds: List[Tensor], + angle_preds: List[Tensor], + score_factors: Optional[List[Tensor]] = None, + batch_img_metas: Optional[List[dict]] = None, + cfg: Optional[ConfigDict] = None, + rescale: bool = False, + with_nms: bool = True) -> InstanceList: + """Transform a batch of output features extracted from the head into + bbox results. + Note: When score_factors is not None, the cls_scores are + usually multiplied by it then obtain the real score used in NMS, + such as CenterNess in FCOS, IoU branch in ATSS. + Args: + cls_scores (list[Tensor]): Classification scores for all + scale levels, each is a 4D-tensor, has shape + (batch_size, num_priors * num_classes, H, W). + bbox_preds (list[Tensor]): Box energies / deltas for all + scale levels, each is a 4D-tensor, has shape + (batch_size, num_priors * 4, H, W). + angle_preds (list[Tensor]): Box angle for each scale level + with shape (N, num_points * encode_size, H, W) + score_factors (list[Tensor], optional): Score factor for + all scale level, each is a 4D-tensor, has shape + (batch_size, num_priors * 1, H, W). Defaults to None. + batch_img_metas (list[dict], Optional): Batch image meta info. + Defaults to None. + cfg (ConfigDict, optional): Test / postprocessing + configuration, if None, test_cfg would be used. + Defaults to None. + rescale (bool): If True, return boxes in original image space. + Defaults to False. + with_nms (bool): If True, do nms before return boxes. + Defaults to True. + Returns: + list[:obj:`InstanceData`]: Object detection results of each image + after the post process. Each item usually contains following keys. + - scores (Tensor): Classification scores, has a shape + (num_instance, ) + - labels (Tensor): Labels of bboxes, has a shape + (num_instances, ). + - bboxes (Tensor): Has a shape (num_instances, 5), + the last dimension 5 arrange as (x, y, w, h, t). + """ + assert len(cls_scores) == len(bbox_preds) + + if score_factors is None: + # e.g. Retina, FreeAnchor, Foveabox, etc. + with_score_factors = False + else: + # e.g. FCOS, PAA, ATSS, AutoAssign, etc. + with_score_factors = True + assert len(cls_scores) == len(score_factors) + + num_levels = len(cls_scores) + + featmap_sizes = [cls_scores[i].shape[-2:] for i in range(num_levels)] + mlvl_priors = self.prior_generator.grid_priors( + featmap_sizes, + dtype=cls_scores[0].dtype, + device=cls_scores[0].device) + + result_list = [] + + for img_id in range(len(batch_img_metas)): + img_meta = batch_img_metas[img_id] + cls_score_list = select_single_mlvl( + cls_scores, img_id, detach=True) + bbox_pred_list = select_single_mlvl( + bbox_preds, img_id, detach=True) + angle_pred_list = select_single_mlvl( + angle_preds, img_id, detach=True) + if with_score_factors: + score_factor_list = select_single_mlvl( + score_factors, img_id, detach=True) + else: + score_factor_list = [None for _ in range(num_levels)] + + results = self._predict_by_feat_single( + cls_score_list=cls_score_list, + bbox_pred_list=bbox_pred_list, + angle_pred_list=angle_pred_list, + score_factor_list=score_factor_list, + mlvl_priors=mlvl_priors, + img_meta=img_meta, + cfg=cfg, + rescale=rescale, + with_nms=with_nms) + result_list.append(results) + return result_list + + def _predict_by_feat_single(self, + cls_score_list: List[Tensor], + bbox_pred_list: List[Tensor], + angle_pred_list: List[Tensor], + score_factor_list: List[Tensor], + mlvl_priors: List[Tensor], + img_meta: dict, + cfg: ConfigDict, + rescale: bool = False, + with_nms: bool = True) -> InstanceData: + """Transform a single image's features extracted from the head into + bbox results. + Args: + cls_score_list (list[Tensor]): Box scores from all scale + levels of a single image, each item has shape + (num_priors * num_classes, H, W). + bbox_pred_list (list[Tensor]): Box energies / deltas from + all scale levels of a single image, each item has shape + (num_priors * 4, H, W). + angle_pred_list (list[Tensor]): Box angle for a single scale + level with shape (N, num_points * encode_size, H, W). + score_factor_list (list[Tensor]): Score factor from all scale + levels of a single image, each item has shape + (num_priors * 1, H, W). + mlvl_priors (list[Tensor]): Each element in the list is + the priors of a single level in feature pyramid. In all + anchor-based methods, it has shape (num_priors, 4). In + all anchor-free methods, it has shape (num_priors, 2) + when `with_stride=True`, otherwise it still has shape + (num_priors, 4). + img_meta (dict): Image meta info. + cfg (mmengine.Config): Test / postprocessing configuration, + if None, test_cfg would be used. + rescale (bool): If True, return boxes in original image space. + Defaults to False. + with_nms (bool): If True, do nms before return boxes. + Defaults to True. + Returns: + :obj:`InstanceData`: Detection results of each image + after the post process. + Each item usually contains following keys. + - scores (Tensor): Classification scores, has a shape + (num_instance, ) + - labels (Tensor): Labels of bboxes, has a shape + (num_instances, ). + - bboxes (Tensor): Has a shape (num_instances, 5), + the last dimension 5 arrange as (x, y, w, h, t). + """ + if score_factor_list[0] is None: + # e.g. Retina, FreeAnchor, etc. + with_score_factors = False + else: + # e.g. FCOS, PAA, ATSS, etc. + with_score_factors = True + + cfg = self.test_cfg if cfg is None else cfg + cfg = copy.deepcopy(cfg) + img_shape = img_meta['img_shape'] + nms_pre = cfg.get('nms_pre', -1) + + mlvl_bbox_preds = [] + mlvl_valid_priors = [] + mlvl_scores = [] + mlvl_labels = [] + if with_score_factors: + mlvl_score_factors = [] + else: + mlvl_score_factors = None + for level_idx, ( + cls_score, bbox_pred, angle_pred, score_factor, priors) in \ + enumerate(zip(cls_score_list, bbox_pred_list, angle_pred_list, + score_factor_list, mlvl_priors)): + + assert cls_score.size()[-2:] == bbox_pred.size()[-2:] + + # dim = self.bbox_coder.encode_size + bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4) + angle_pred = angle_pred.permute(1, 2, 0).reshape( + -1, self.angle_coder.encode_size) + if with_score_factors: + score_factor = score_factor.permute(1, 2, + 0).reshape(-1).sigmoid() + cls_score = cls_score.permute(1, 2, + 0).reshape(-1, self.cls_out_channels) + if self.use_sigmoid_cls: + scores = cls_score.sigmoid() + else: + # remind that we set FG labels to [0, num_class-1] + # since mmdet v2.0 + # BG cat_id: num_class + scores = cls_score.softmax(-1)[:, :-1] + + # After https://github.com/open-mmlab/mmdetection/pull/6268/, + # this operation keeps fewer bboxes under the same `nms_pre`. + # There is no difference in performance for most models. If you + # find a slight drop in performance, you can set a larger + # `nms_pre` than before. + score_thr = cfg.get('score_thr', 0) + + results = filter_scores_and_topk( + scores, score_thr, nms_pre, + dict( + bbox_pred=bbox_pred, angle_pred=angle_pred, priors=priors)) + scores, labels, keep_idxs, filtered_results = results + + bbox_pred = filtered_results['bbox_pred'] + angle_pred = filtered_results['angle_pred'] + priors = filtered_results['priors'] + + decoded_angle = self.angle_coder.decode(angle_pred, keepdim=True) + bbox_pred = torch.cat([bbox_pred, decoded_angle], dim=-1) + + if with_score_factors: + score_factor = score_factor[keep_idxs] + + mlvl_bbox_preds.append(bbox_pred) + mlvl_valid_priors.append(priors) + mlvl_scores.append(scores) + mlvl_labels.append(labels) + + if with_score_factors: + mlvl_score_factors.append(score_factor) + + bbox_pred = torch.cat(mlvl_bbox_preds) + priors = cat_boxes(mlvl_valid_priors) + bboxes = self.bbox_coder.decode(priors, bbox_pred, max_shape=img_shape) + + results = InstanceData() + results.bboxes = RotatedBoxes(bboxes) + results.scores = torch.cat(mlvl_scores) + results.labels = torch.cat(mlvl_labels) + if with_score_factors: + results.score_factors = torch.cat(mlvl_score_factors) + + return self._bbox_post_process( + results=results, + cfg=cfg, + rescale=rescale, + with_nms=with_nms, + img_meta=img_meta) + + +@MODELS.register_module() +class RotatedRTMDetSepBNHead(RotatedRTMDetHead): + """Rotated RTMDetHead with separated BN layers and shared conv layers. + + Args: + num_classes (int): Number of categories excluding the background + category. + in_channels (int): Number of channels in the input feature map. + share_conv (bool): Whether to share conv layers between stages. + Defaults to True. + norm_cfg (:obj:`ConfigDict` or dict)): Config dict for normalization + layer. Defaults to dict(type='BN', momentum=0.03, eps=0.001). + act_cfg (:obj:`ConfigDict` or dict)): Config dict for activation layer. + Defaults to dict(type='SiLU'). + pred_kernel_size (int): Kernel size of prediction layer. Defaults to 1. + """ + + def __init__(self, + num_classes: int, + in_channels: int, + share_conv: bool = True, + scale_angle: bool = False, + norm_cfg: ConfigType = dict( + type='BN', momentum=0.03, eps=0.001), + act_cfg: ConfigType = dict(type='SiLU'), + pred_kernel_size: int = 1, + exp_on_reg=False, + **kwargs) -> None: + self.share_conv = share_conv + self.exp_on_reg = exp_on_reg + assert scale_angle == False, 'scale_angle does not support in RotatedRTMDetSepBNHead' + super().__init__( + num_classes, + in_channels, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + pred_kernel_size=pred_kernel_size, + scale_angle=False, + **kwargs) + + def _init_layers(self) -> None: + """Initialize layers of the head.""" + self.cls_convs = nn.ModuleList() + self.reg_convs = nn.ModuleList() + + self.rtm_cls = nn.ModuleList() + self.rtm_reg = nn.ModuleList() + self.rtm_ang = nn.ModuleList() + if self.with_objectness: + self.rtm_obj = nn.ModuleList() + for n in range(len(self.prior_generator.strides)): + cls_convs = nn.ModuleList() + reg_convs = nn.ModuleList() + for i in range(self.stacked_convs): + chn = self.in_channels if i == 0 else self.feat_channels + cls_convs.append( + ConvModule( + chn, + self.feat_channels, + 3, + stride=1, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + reg_convs.append( + ConvModule( + chn, + self.feat_channels, + 3, + stride=1, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + self.cls_convs.append(cls_convs) + self.reg_convs.append(reg_convs) + + self.rtm_cls.append( + nn.Conv2d( + self.feat_channels, + self.num_base_priors * self.cls_out_channels, + self.pred_kernel_size, + padding=self.pred_kernel_size // 2)) + self.rtm_reg.append( + nn.Conv2d( + self.feat_channels, + self.num_base_priors * 4, + self.pred_kernel_size, + padding=self.pred_kernel_size // 2)) + self.rtm_ang.append( + nn.Conv2d( + self.feat_channels, + self.num_base_priors * self.angle_coder.encode_size, + self.pred_kernel_size, + padding=self.pred_kernel_size // 2)) + if self.with_objectness: + self.rtm_obj.append( + nn.Conv2d( + self.feat_channels, + 1, + self.pred_kernel_size, + padding=self.pred_kernel_size // 2)) + + if self.share_conv: + for n in range(len(self.prior_generator.strides)): + for i in range(self.stacked_convs): + self.cls_convs[n][i].conv = self.cls_convs[0][i].conv + self.reg_convs[n][i].conv = self.reg_convs[0][i].conv + + def init_weights(self) -> None: + """Initialize weights of the head.""" + for m in self.modules(): + if isinstance(m, nn.Conv2d): + normal_init(m, mean=0, std=0.01) + if is_norm(m): + constant_init(m, 1) + bias_cls = bias_init_with_prob(0.01) + for rtm_cls, rtm_reg, rtm_ang in zip(self.rtm_cls, self.rtm_reg, self.rtm_ang): + normal_init(rtm_cls, std=0.01, bias=bias_cls) + normal_init(rtm_reg, std=0.01) + normal_init(rtm_ang, std=0.01) + if self.with_objectness: + for rtm_obj in self.rtm_obj: + normal_init(rtm_obj, std=0.01, bias=bias_cls) + + def forward(self, feats: Tuple[Tensor, ...]) -> tuple: + """Forward features from the upstream network. + + Args: + feats (tuple[Tensor]): Features from the upstream network, each is + a 4D-tensor. + + Returns: + tuple: Usually a tuple of classification scores and bbox prediction + + - cls_scores (tuple[Tensor]): Classification scores for all scale + levels, each is a 4D-tensor, the channels number is + num_anchors * num_classes. + - bbox_preds (tuple[Tensor]): Box energies / deltas for all scale + levels, each is a 4D-tensor, the channels number is + num_anchors * 4. + """ + + cls_scores = [] + bbox_preds = [] + angle_preds = [] + for idx, (x, stride) in enumerate( + zip(feats, self.prior_generator.strides)): + cls_feat = x + reg_feat = x + + for cls_layer in self.cls_convs[idx]: + cls_feat = cls_layer(cls_feat) + cls_score = self.rtm_cls[idx](cls_feat) + + for reg_layer in self.reg_convs[idx]: + reg_feat = reg_layer(reg_feat) + + if self.with_objectness: + objectness = self.rtm_obj[idx](reg_feat) + cls_score = inverse_sigmoid( + sigmoid_geometric_mean(cls_score, objectness)) + if self.exp_on_reg: + reg_dist = self.rtm_reg[idx](reg_feat).exp() * stride[0] + else: + reg_dist = self.rtm_reg[idx](reg_feat) * stride[0] + + angle_pred = self.rtm_ang[idx](reg_feat) + + cls_scores.append(cls_score) + bbox_preds.append(reg_dist) + angle_preds.append(angle_pred) + return tuple(cls_scores), tuple(bbox_preds), tuple(angle_preds) + + +if __name__ == '__main__': + points = torch.tensor([[0., 0., 8., 8.], + [8., 0., 8., 8.], + [16., 0., 8., 8.], + [24., 0., 8., 8.]], device='cuda:0') + + distances = torch.tensor( + [[[7.4215, 7.8629, 7.4568, 8.1447, -0.0224], + [7.3209, 7.7807, 7.4076, 8.1743, -0.0194], + [7.2929, 7.7480, 7.3624, 8.1829, -0.0228], + [7.3291, 7.7770, 7.3966, 8.2230, -0.0215]], + [[7.5713, 7.8529, 7.5189, 8.1220, -0.0212], + [7.6215, 7.8978, 7.5875, 8.2110, -0.0304], + [7.6464, 7.8849, 7.4770, 8.0470, -0.0372], + [7.5255, 7.8110, 7.4478, 8.1207, -0.0282]], + [[7.4363, 7.8605, 7.4451, 8.1569, -0.0239], + [7.3455, 7.7839, 7.4027, 8.1784, -0.0184], + [7.3022, 7.7584, 7.3663, 8.1751, -0.0232], + [7.3419, 7.7717, 7.3903, 8.2077, -0.0242]], + [[7.4416, 7.8393, 7.4837, 8.1546, -0.0213], + [7.3217, 7.7790, 7.3956, 8.1805, -0.0154], + [7.3080, 7.7377, 7.3658, 8.1779, -0.0205], + [7.3390, 7.7580, 7.4031, 8.2245, -0.0205]], + [[7.7741, 7.8088, 7.7546, 8.3303, -0.0151], + [7.7897, 7.9653, 7.9556, 8.3727, -0.0266], + [8.2531, 8.2622, 8.4759, 8.1064, -0.0506], + [8.1185, 7.9733, 8.1983, 8.3481, -0.0243]], + [[7.4850, 7.8193, 7.4702, 8.1592, -0.0248], + [7.3636, 7.7511, 7.4078, 8.1570, -0.0204], + [7.3184, 7.7122, 7.3942, 8.1611, -0.0243], + [7.3617, 7.7788, 7.4001, 8.2358, -0.0256]], + [[7.4395, 7.8696, 7.4530, 8.1517, -0.0246], + [7.3550, 7.7790, 7.4124, 8.1524, -0.0221], + [7.3377, 7.7614, 7.3752, 8.1712, -0.0226], + [7.3411, 7.7603, 7.4000, 8.2265, -0.0246]], + [[7.4344, 7.8780, 7.4388, 8.1534, -0.0247], + [7.3309, 7.8050, 7.3925, 8.1647, -0.0202], + [7.2904, 7.7461, 7.3651, 8.1827, -0.0221], + [7.3486, 7.7741, 7.4090, 8.2144, -0.0230]]], device='cuda:0') + + res = distance2obb(points, distances, 'le90').reshape(-1, 5) + + c = DistanceAnglePointCoder(angle_version='le90') + res2 = c.decode(points.repeat(8, 1)[:, :2], distances.reshape(-1, 5)) + + print((res == res2).unique()) diff --git a/mmrotate/models/task_modules/coders/distance_angle_point_coder.py b/mmrotate/models/task_modules/coders/distance_angle_point_coder.py index d456d7202..0b7d3f9ec 100644 --- a/mmrotate/models/task_modules/coders/distance_angle_point_coder.py +++ b/mmrotate/models/task_modules/coders/distance_angle_point_coder.py @@ -95,17 +95,17 @@ def distance2obb(self, distance, max_shape=None, angle_version='oc'): - distance, angle = distance.split([4, 1], dim=1) + distance, angle = distance.split([4, 1], dim=-1) cos_angle, sin_angle = torch.cos(angle), torch.sin(angle) - rot_matrix = torch.cat([cos_angle, -sin_angle, sin_angle, cos_angle], - dim=1).reshape(-1, 2, 2) - wh = distance[:, :2] + distance[:, 2:] - offset_t = (distance[:, 2:] - distance[:, :2]) / 2 - offset_t = offset_t.unsqueeze(2) - offset = torch.bmm(rot_matrix, offset_t).squeeze(2) - ctr = points + offset + rot_matrix = torch.cat([cos_angle, -sin_angle, sin_angle, cos_angle], dim=-1) + rot_matrix = rot_matrix.reshape(*rot_matrix.shape[:-1], 2, 2) + + wh = distance[..., :2] + distance[..., 2:] + offset_t = (distance[..., 2:] - distance[..., :2]) / 2 + offset = torch.matmul(rot_matrix, offset_t[..., None]).squeeze(-1) + ctr = points[..., :2] + offset angle_regular = norm_angle(angle, angle_version) return torch.cat([ctr, wh, angle_regular], dim=-1) From 6cc5e98b4a6dc6d5cb2259bcb6124ec90279c7de Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Tue, 8 Nov 2022 18:07:30 +0800 Subject: [PATCH 15/52] fix distribution angle coder --- mmrotate/models/task_modules/coders/angle_coder.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mmrotate/models/task_modules/coders/angle_coder.py b/mmrotate/models/task_modules/coders/angle_coder.py index 7941cd423..8db58fa6a 100644 --- a/mmrotate/models/task_modules/coders/angle_coder.py +++ b/mmrotate/models/task_modules/coders/angle_coder.py @@ -172,10 +172,10 @@ def encode(self, angle: Tensor) -> Tensor: return dfl_target.flatten() def decode(self, angle: Tensor, keepdim: bool = False) -> Tensor: - angle = F.softmax(angle.reshape(-1, self.reg_max + 1), dim=-1) - angle = F.linear(angle, self.project.type_as(angle)) + decode_angle = F.softmax(angle.reshape(-1, self.reg_max + 1), dim=-1) + decode_angle = F.linear(decode_angle, self.project.type_as(angle)) if keepdim: - angle = angle.reshape(-1, 1) + decode_angle = decode_angle.reshape(*angle.shape[:-1], 1) else: - angle = angle.reshape(-1) - return self.angle_range * angle / self.reg_max - self.angle_offset + decode_angle = decode_angle.reshape(-1) + return self.angle_range * decode_angle / self.reg_max - self.angle_offset From 3ffdf3c7769cd8dd5a9b2597160ae0148d950e5b Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Tue, 8 Nov 2022 19:04:43 +0800 Subject: [PATCH 16/52] clean --- .../rotated_rtmdet/_base_/default_runtime.py | 33 +++ configs/rotated_rtmdet/_base_/dota_rr.py | 104 +++++++++ configs/rotated_rtmdet/_base_/schedule_3x.py | 34 +++ .../rotated_rtmdet_l-300e-hrsc.py | 208 ------------------ .../rotated_rtmdet_l-3x-dota.py | 77 +++++++ .../rotated_rtmdet_s-300e-hrsc.py | 70 ------ .../rotated_rtmdet_tiny-300e-hrsc.py | 50 ----- .../rotated_rtmdet_tiny-3x-dota.py | 20 ++ .../models/dense_heads/rotated_rtmdet_head.py | 178 +++++++-------- mmrotate/models/losses/__init__.py | 3 +- mmrotate/models/losses/rd_iou_loss.py | 166 ++++++++++++++ 11 files changed, 527 insertions(+), 416 deletions(-) create mode 100644 configs/rotated_rtmdet/_base_/default_runtime.py create mode 100644 configs/rotated_rtmdet/_base_/dota_rr.py create mode 100644 configs/rotated_rtmdet/_base_/schedule_3x.py delete mode 100644 configs/rotated_rtmdet/rotated_rtmdet_l-300e-hrsc.py create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_l-3x-dota.py delete mode 100644 configs/rotated_rtmdet/rotated_rtmdet_s-300e-hrsc.py delete mode 100644 configs/rotated_rtmdet/rotated_rtmdet_tiny-300e-hrsc.py create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota.py create mode 100644 mmrotate/models/losses/rd_iou_loss.py diff --git a/configs/rotated_rtmdet/_base_/default_runtime.py b/configs/rotated_rtmdet/_base_/default_runtime.py new file mode 100644 index 000000000..724ad4014 --- /dev/null +++ b/configs/rotated_rtmdet/_base_/default_runtime.py @@ -0,0 +1,33 @@ +default_scope = 'mmrotate' + +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', interval=12, max_keep_ckpts=3), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='mmdet.DetVisualizationHook')) + +env_cfg = dict( + cudnn_benchmark=False, + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + dist_cfg=dict(backend='nccl'), +) + +vis_backends = [dict(type='LocalVisBackend')] +visualizer = dict( + type='RotLocalVisualizer', vis_backends=vis_backends, name='visualizer') +log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) + +log_level = 'INFO' +load_from = None +resume = False + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='mmdet.ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49) +] diff --git a/configs/rotated_rtmdet/_base_/dota_rr.py b/configs/rotated_rtmdet/_base_/dota_rr.py new file mode 100644 index 000000000..e1c9bf46b --- /dev/null +++ b/configs/rotated_rtmdet/_base_/dota_rr.py @@ -0,0 +1,104 @@ +# dataset settings +dataset_type = 'DOTADataset' +# data_root = '/home/wangchen/liuyanyi/datasets/dota_mmrotate_ss/' +data_root = '/datasets/dota_mmrotate_ss/' +file_client_args = dict(backend='disk') + +train_pipeline = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True), + dict( + type='mmdet.RandomFlip', + prob=0.75, + direction=['horizontal', 'vertical', 'diagonal']), + dict( + type='RandomRotate', + prob=0.5, + angle_range=180, + rect_obj_labels=[9, 11]), + dict( + type='mmdet.Pad', size=(1024, 1024), + pad_val=dict(img=(114, 114, 114))), + dict(type='mmdet.PackDetInputs') +] +val_pipeline = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True), + # avoid bboxes being resized + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict( + type='mmdet.Pad', size=(1024, 1024), + pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] +test_pipeline = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True), + dict( + type='mmdet.Pad', size=(1024, 1024), + pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] +train_dataloader = dict( + batch_size=8, + num_workers=8, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + batch_sampler=None, + pin_memory=True, + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='trainval/annfiles/', + data_prefix=dict(img_path='trainval/images/'), + img_shape=(1024, 1024), + filter_cfg=dict(filter_empty_gt=True), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='val/annfiles/', + data_prefix=dict(img_path='val/images/'), + img_shape=(1024, 1024), + test_mode=True, + pipeline=val_pipeline)) +# test_dataloader = val_dataloader + +val_evaluator = dict(type='DOTAMetric', metric='mAP') +# test_evaluator = val_evaluator + +# inference on test dataset and format the output results +# for submission. Note: the test set has no annotation. +test_dataloader = dict( + batch_size=8, + num_workers=8, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict(img_path='test/images/'), + img_shape=(1024, 1024), + test_mode=True, + pipeline=test_pipeline)) +test_evaluator = dict( + type='DOTAMetric', + format_only=True, + merge_patches=True, + outfile_prefix='./work_dirs/rtm_iou_test_coco/Task1') diff --git a/configs/rotated_rtmdet/_base_/schedule_3x.py b/configs/rotated_rtmdet/_base_/schedule_3x.py new file mode 100644 index 000000000..30f850b3d --- /dev/null +++ b/configs/rotated_rtmdet/_base_/schedule_3x.py @@ -0,0 +1,34 @@ +max_epochs = 3 * 12 +base_lr = 0.004 / 16 +interval = 12 + +train_cfg = dict( + type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=interval) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_l-300e-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_l-300e-hrsc.py deleted file mode 100644 index e43377a27..000000000 --- a/configs/rotated_rtmdet/rotated_rtmdet_l-300e-hrsc.py +++ /dev/null @@ -1,208 +0,0 @@ -_base_ = [ - '../_base_/default_runtime.py', '../_base_/schedules/schedule_1x.py', - '../_base_/datasets/hrsc.py' -] -model = dict( - type='mmdet.RTMDet', - data_preprocessor=dict( - type='mmdet.DetDataPreprocessor', - mean=[103.53, 116.28, 123.675], - std=[57.375, 57.12, 58.395], - bgr_to_rgb=False, - boxtype2tensor=False, - batch_augments=None), - backbone=dict( - type='mmdet.CSPNeXt', - arch='P5', - expand_ratio=0.5, - deepen_factor=1, - widen_factor=1, - channel_attention=True, - norm_cfg=dict(type='SyncBN'), - act_cfg=dict(type='SiLU')), - neck=dict( - type='mmdet.CSPNeXtPAFPN', - in_channels=[256, 512, 1024], - out_channels=256, - num_csp_blocks=3, - expand_ratio=0.5, - norm_cfg=dict(type='SyncBN'), - act_cfg=dict(type='SiLU')), - bbox_head=dict( - type='RotatedRTMDetSepBNHead', - # type='RTMDetSepBNHead', - num_classes=1, - in_channels=256, - stacked_convs=2, - feat_channels=256, - anchor_generator=dict( - type='mmdet.MlvlPointGenerator', offset=0, strides=[8, 16, 32]), - bbox_coder=dict(type='DistanceAnglePointCoder', angle_version='le90'), - loss_cls=dict( - type='mmdet.QualityFocalLoss', - use_sigmoid=True, - beta=2.0, - loss_weight=1.0), - loss_bbox=dict(type='RotatedIoULoss', mode='linear', loss_weight=2.0), - with_objectness=False, - exp_on_reg=True, - share_conv=True, - pred_kernel_size=1, - use_hbbox_loss=False, - scale_angle=False, - # angle_coder=dict( - # _scope_='mmrotate', - # type='CSLCoder', - # angle_version='le90', - # omega=4, - # window='gaussian', - # radius=3), - # loss_angle=dict( - # _scope_='mmrotate', - # type='SmoothFocalLoss', - # gamma=2.0, - # alpha=0.25, - # loss_weight=0.8), - # loss_bbox=dict(type='mmdet.GIoULoss', loss_weight=2.0), - norm_cfg=dict(type='SyncBN'), - act_cfg=dict(type='SiLU')), - train_cfg=dict( - assigner=dict( - type='mmdet.DynamicSoftLabelAssigner', - iou_calculator=dict(type='RBboxOverlaps2D'), - topk=13), - allowed_border=-1, - pos_weight=-1, - debug=False), - test_cfg=dict( - nms_pre=2000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms_rotated', iou_threshold=0.1), - max_per_img=2000), -) - -train_pipeline = [ - dict( - type='mmdet.LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), - dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), - dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), - dict(type='mmdet.CachedMosaic', img_scale=(800, 800), pad_val=114.0), - dict( - type='mmdet.RandomResize', - resize_type='mmdet.Resize', - scale=(1600, 1600), - ratio_range=(0.1, 2.0), - keep_ratio=True), - dict(type='mmdet.RandomCrop', crop_size=(800, 800)), - dict(type='mmdet.YOLOXHSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), - dict( - type='mmdet.CachedMixUp', - img_scale=(800, 800), - ratio_range=(1.0, 1.0), - max_cached_images=20, - pad_val=(114, 114, 114)), - dict(type='mmdet.PackDetInputs') -] - -train_pipeline_stage2 = [ - dict( - type='mmdet.LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), - dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), - dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), - dict( - type='mmdet.RandomResize', - resize_type='mmdet.Resize', - scale=(1600, 1600), - ratio_range=(0.1, 2.0), - keep_ratio=True), - dict(type='mmdet.RandomCrop', crop_size=(800, 800)), - dict(type='mmdet.YOLOXHSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), - dict(type='mmdet.PackDetInputs') -] - -test_pipeline = [ - dict( - type='mmdet.LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), - dict(type='mmdet.Resize', scale=(800, 800), keep_ratio=True), - dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), - dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), - dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), - dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor')) -] - -train_dataloader = dict( - batch_size=8, - num_workers=8, - batch_sampler=None, - pin_memory=True, - dataset=dict(pipeline=train_pipeline)) -val_dataloader = dict( - batch_size=1, num_workers=1, dataset=dict(pipeline=test_pipeline)) -test_dataloader = val_dataloader - -max_epochs = 300 -stage2_num_epochs = 20 -base_lr = 0.004 / 4 -interval = 20 - -train_cfg = dict( - max_epochs=max_epochs, - val_interval=interval, - dynamic_intervals=[(max_epochs - stage2_num_epochs, 10)]) - -# optimizer -optim_wrapper = dict( - _delete_=True, - type='OptimWrapper', - optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), - paramwise_cfg=dict( - norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) - -# learning rate -param_scheduler = [ - dict( - type='LinearLR', - start_factor=1.0e-5, - by_epoch=False, - begin=0, - end=1000), - dict( - # use cosine lr from 150 to 300 epoch - type='CosineAnnealingLR', - eta_min=base_lr * 0.05, - begin=max_epochs // 2, - end=max_epochs, - T_max=max_epochs // 2, - by_epoch=True, - convert_to_iter_based=True), -] - -# hooks -default_hooks = dict( - checkpoint=dict( - interval=interval, - max_keep_ckpts=3 # only keep latest 3 checkpoints - )) -custom_hooks = [ - dict( - type='EMAHook', - ema_type='mmdet.ExpMomentumEMA', - momentum=0.0002, - update_buffers=True, - priority=49), - dict( - type='mmdet.PipelineSwitchHook', - switch_epoch=max_epochs - stage2_num_epochs, - switch_pipeline=train_pipeline_stage2) -] diff --git a/configs/rotated_rtmdet/rotated_rtmdet_l-3x-dota.py b/configs/rotated_rtmdet/rotated_rtmdet_l-3x-dota.py new file mode 100644 index 000000000..513494c00 --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_l-3x-dota.py @@ -0,0 +1,77 @@ +_base_ = [ + './_base_/default_runtime.py', './_base_/schedule_3x.py', + './_base_/dota_rr.py' +] +angle_version = 'le90' +model = dict( + type='mmdet.RTMDet', + data_preprocessor=dict( + type='mmdet.DetDataPreprocessor', + mean=[103.53, 116.28, 123.675], + std=[57.375, 57.12, 58.395], + bgr_to_rgb=False, + boxtype2tensor=False, + batch_augments=None), + backbone=dict( + type='mmdet.CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=1, + widen_factor=1, + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU')), + neck=dict( + type='mmdet.CSPNeXtPAFPN', + in_channels=[256, 512, 1024], + out_channels=256, + num_csp_blocks=3, + expand_ratio=0.5, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU')), + bbox_head=dict( + type='RotatedRTMDetSepBNHead', + # type='RTMDetSepBNHead', + num_classes=15, + in_channels=256, + stacked_convs=2, + feat_channels=256, + angle_version=angle_version, + anchor_generator=dict( + type='mmdet.MlvlPointGenerator', offset=0, strides=[8, 16, 32]), + bbox_coder=dict( + type='DistanceAnglePointCoder', angle_version=angle_version), + loss_cls=dict( + type='mmdet.QualityFocalLoss', + use_sigmoid=True, + beta=2.0, + loss_weight=1.0), + loss_bbox=dict(type='RotatedIoULoss', mode='linear', loss_weight=2.0), + with_objectness=False, + exp_on_reg=True, + share_conv=True, + pred_kernel_size=1, + use_hbbox_loss=False, + scale_angle=False, + # angle_coder=dict( + # type='DistributionAngleCoder', + # angle_version='le90'), + loss_angle=None, + # loss_bbox=dict(type='mmdet.GIoULoss', loss_weight=2.0), + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU')), + train_cfg=dict( + assigner=dict( + type='mmdet.DynamicSoftLabelAssigner', + iou_calculator=dict(type='RBboxOverlaps2D'), + topk=13), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=2000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms_rotated', iou_threshold=0.1), + max_per_img=2000), +) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_s-300e-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_s-300e-hrsc.py deleted file mode 100644 index a3b52616d..000000000 --- a/configs/rotated_rtmdet/rotated_rtmdet_s-300e-hrsc.py +++ /dev/null @@ -1,70 +0,0 @@ -_base_ = './rotated_rtmdet_l-300e-hrsc.py' -checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' # noqa -model = dict( - backbone=dict( - deepen_factor=0.33, - widen_factor=0.5, - init_cfg=dict( - type='Pretrained', prefix='backbone.', checkpoint=checkpoint)), - neck=dict(in_channels=[128, 256, 512], out_channels=128, num_csp_blocks=1), - bbox_head=dict(in_channels=128, feat_channels=128, exp_on_reg=False)) - -train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), - dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), - dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), - dict(type='mmdet.CachedMosaic', img_scale=(800, 800), pad_val=114.0), - dict( - type='RandomResize', - resize_type='mmdet.Resize', - scale=(1600, 1600), - ratio_range=(0.5, 2.0), - keep_ratio=True), - dict(type='mmdet.RandomCrop', crop_size=(800, 800)), - dict(type='mmdet.YOLOXHSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), - dict( - type='mmdet.CachedMixUp', - img_scale=(800, 800), - ratio_range=(1.0, 1.0), - max_cached_images=20, - pad_val=(114, 114, 114)), - dict(type='mmdet.PackDetInputs') -] - -train_pipeline_stage2 = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), - dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), - dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), - dict( - type='RandomResize', - resize_type='mmdet.Resize', - scale=(1600, 1600), - ratio_range=(0.5, 2.0), - keep_ratio=True), - dict(type='mmdet.RandomCrop', crop_size=(800, 800)), - dict(type='mmdet.YOLOXHSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), - dict(type='mmdet.PackDetInputs') -] - -train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) - -custom_hooks = [ - dict( - type='EMAHook', - ema_type='mmdet.ExpMomentumEMA', - momentum=0.0002, - update_buffers=True, - priority=49), - dict( - type='mmdet.PipelineSwitchHook', - switch_epoch=280, - switch_pipeline=train_pipeline_stage2) -] \ No newline at end of file diff --git a/configs/rotated_rtmdet/rotated_rtmdet_tiny-300e-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_tiny-300e-hrsc.py deleted file mode 100644 index 1df2d99b3..000000000 --- a/configs/rotated_rtmdet/rotated_rtmdet_tiny-300e-hrsc.py +++ /dev/null @@ -1,50 +0,0 @@ -_base_ = './rotated_rtmdet_s-300e-hrsc.py' - -cocop = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth' # noqa -checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa - -model = dict( - init_cfg=dict(type='Pretrained', checkpoint=cocop), - backbone=dict( - deepen_factor=0.167, - widen_factor=0.375, - # init_cfg=dict( - # type='Pretrained', prefix='backbone.', checkpoint=checkpoint) - ), - neck=dict(in_channels=[96, 192, 384], out_channels=96, num_csp_blocks=1), - bbox_head=dict(in_channels=96, feat_channels=96, exp_on_reg=False)) - -train_pipeline = [ - dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), - dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), - dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), - dict( - type='mmdet.CachedMosaic', - img_scale=(800, 800), - pad_val=114.0, - max_cached_images=20, - random_pop=False), - dict( - type='RandomResize', - resize_type='mmdet.Resize', - scale=(1600, 1600), - ratio_range=(0.5, 2.0), - keep_ratio=True), - dict(type='mmdet.RandomCrop', crop_size=(800, 800)), - dict(type='mmdet.YOLOXHSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), - dict( - type='mmdet.CachedMixUp', - img_scale=(800, 800), - ratio_range=(1.0, 1.0), - max_cached_images=10, - random_pop=False, - pad_val=(114, 114, 114), - prob=0.5), - dict(type='mmdet.PackDetInputs') -] - -train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota.py b/configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota.py new file mode 100644 index 000000000..77c81f587 --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota.py @@ -0,0 +1,20 @@ +_base_ = './rotated_rtmdet_l-3x-dota.py' + +coco_ckpt = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth' # noqa +# checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa + +model = dict( + init_cfg=dict(type='Pretrained', checkpoint=coco_ckpt), + backbone=dict( + deepen_factor=0.167, + widen_factor=0.375, + # init_cfg=dict( + # type='Pretrained', prefix='backbone.', checkpoint=checkpoint) + ), + neck=dict(in_channels=[96, 192, 384], out_channels=96, num_csp_blocks=1), + bbox_head=dict( + in_channels=96, + feat_channels=96, + exp_on_reg=False, + loss_bbox=dict(type='RotatedIoULoss', mode='linear', loss_weight=2.0), + )) diff --git a/mmrotate/models/dense_heads/rotated_rtmdet_head.py b/mmrotate/models/dense_heads/rotated_rtmdet_head.py index e0f1fbdde..b7b7c1cf9 100644 --- a/mmrotate/models/dense_heads/rotated_rtmdet_head.py +++ b/mmrotate/models/dense_heads/rotated_rtmdet_head.py @@ -1,33 +1,36 @@ +# Copyright (c) OpenMMLab. All rights reserved. import copy -from typing import Tuple, List, Optional +from typing import List, Optional, Tuple import torch -from mmcv.cnn import Scale, ConvModule, is_norm +from mmcv.cnn import ConvModule, Scale, is_norm from mmdet.models import inverse_sigmoid from mmdet.models.dense_heads import RTMDetHead from mmdet.models.task_modules import anchor_inside_flags -from mmdet.models.utils import sigmoid_geometric_mean, multi_apply, unmap, select_single_mlvl, filter_scores_and_topk -from mmdet.structures.bbox import distance2bbox, get_box_tensor, cat_boxes, bbox_cxcywh_to_xyxy -from mmdet.utils import ConfigType, OptConfigType, InstanceList, OptInstanceList, reduce_mean +from mmdet.models.utils import (filter_scores_and_topk, multi_apply, + select_single_mlvl, sigmoid_geometric_mean, + unmap) +from mmdet.structures.bbox import bbox_cxcywh_to_xyxy, cat_boxes, distance2bbox +from mmdet.utils import (ConfigType, InstanceList, OptConfigType, + OptInstanceList, reduce_mean) from mmengine import ConfigDict -from mmengine.model import normal_init, constant_init, bias_init_with_prob +from mmengine.model import bias_init_with_prob, constant_init, normal_init from mmengine.structures import InstanceData -from mmrotate.registry import MODELS, TASK_UTILS +from torch import Tensor, nn from mmrotate.models.task_modules.coders import DistanceAnglePointCoder -from mmrotate.structures import norm_angle, RotatedBoxes -from torch import nn, Tensor +from mmrotate.registry import MODELS, TASK_UTILS +from mmrotate.structures import RotatedBoxes, norm_angle # TODO move to mmrotate.structures.transform and update Coder -def distance2obb(points, - distance, - angle_version='oc'): +def distance2obb(points, distance, angle_version='oc'): distance, angle = distance.split([4, 1], dim=-1) cos_angle, sin_angle = torch.cos(angle), torch.sin(angle) - rot_matrix = torch.cat([cos_angle, -sin_angle, sin_angle, cos_angle], dim=-1) + rot_matrix = torch.cat([cos_angle, -sin_angle, sin_angle, cos_angle], + dim=-1) rot_matrix = rot_matrix.reshape(*rot_matrix.shape[:-1], 2, 2) wh = distance[..., :2] + distance[..., 2:] @@ -57,11 +60,13 @@ class RotatedRTMDetHead(RTMDetHead): def __init__(self, num_classes: int, in_channels: int, + angle_version: str = 'le90', use_hbbox_loss: bool = False, scale_angle: bool = True, angle_coder: ConfigType = dict(type='PseudoAngleCoder'), loss_angle: OptConfigType = None, **kwargs) -> None: + self.angle_version = angle_version self.use_hbbox_loss = use_hbbox_loss self.is_scale_angle = scale_angle self.angle_coder = TASK_UTILS.build(angle_coder) @@ -147,10 +152,10 @@ def forward(self, feats: Tuple[Tensor, ...]) -> tuple: angle_preds.append(angle_pred) return tuple(cls_scores), tuple(bbox_preds), tuple(angle_preds) - def loss_by_feat_single(self, cls_score: Tensor, bbox_pred: Tensor, angle_pred: Tensor, - labels: Tensor, label_weights: Tensor, - bbox_targets: Tensor, assign_metrics: Tensor, - stride: List[int]): + def loss_by_feat_single(self, cls_score: Tensor, bbox_pred: Tensor, + angle_pred: Tensor, labels: Tensor, + label_weights: Tensor, bbox_targets: Tensor, + assign_metrics: Tensor, stride: List[int]): """Compute loss of a single scale level. Args: @@ -201,14 +206,16 @@ def loss_by_feat_single(self, cls_score: Tensor, bbox_pred: Tensor, angle_pred: pos_decode_bbox_pred = pos_bbox_pred pos_decode_bbox_targets = pos_bbox_targets if self.use_hbbox_loss: - pos_decode_bbox_targets = bbox_cxcywh_to_xyxy(pos_bbox_targets[:, :4]) + pos_decode_bbox_targets = bbox_cxcywh_to_xyxy( + pos_bbox_targets[:, :4]) # regression loss pos_bbox_weight = assign_metrics[pos_inds] loss_angle = angle_pred.sum() * 0 if self.loss_angle is not None: - angle_pred = angle_pred.reshape(-1, self.angle_coder.encode_size) + angle_pred = angle_pred.reshape(-1, + self.angle_coder.encode_size) pos_angle_pred = angle_pred[pos_inds] pos_angle_target = pos_bbox_targets[:, 4:5] pos_angle_target = self.angle_coder.encode(pos_angle_target) @@ -229,7 +236,8 @@ def loss_by_feat_single(self, cls_score: Tensor, bbox_pred: Tensor, angle_pred: pos_bbox_weight = bbox_targets.new_tensor(0.) loss_angle = angle_pred.sum() * 0 - return loss_cls, loss_bbox, loss_angle, assign_metrics.sum(), pos_bbox_weight.sum(), pos_bbox_weight.sum() + return loss_cls, loss_bbox, loss_angle, assign_metrics.sum( + ), pos_bbox_weight.sum(), pos_bbox_weight.sum() def loss_by_feat(self, cls_scores: List[Tensor], @@ -275,10 +283,12 @@ def loss_by_feat(self, decoded_bboxes = [] decoded_hbboxes = [] angle_preds_list = [] - for anchor, bbox_pred, angle_pred in zip(anchor_list[0], bbox_preds, angle_preds): + for anchor, bbox_pred, angle_pred in zip(anchor_list[0], bbox_preds, + angle_preds): anchor = anchor.reshape(-1, 4) bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, 4) - angle_pred = angle_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, self.angle_coder.encode_size) + angle_pred = angle_pred.permute(0, 2, 3, 1).reshape( + num_imgs, -1, self.angle_coder.encode_size) if self.use_hbbox_loss: hbbox_pred = distance2bbox(anchor, bbox_pred) @@ -287,15 +297,8 @@ def loss_by_feat(self, decoded_angle = self.angle_coder.decode(angle_pred, keepdim=True) bbox_pred = torch.cat([bbox_pred, decoded_angle], dim=-1) - # res = distance2obb(anchor, bbox_pred, 'le90').reshape(-1, 5) - # - # c = DistanceAnglePointCoder(angle_version='le90') - # res2 = c.decode(anchor.repeat(8, 1)[:, :2], bbox_pred.reshape(-1, 5)) - # - # print((res == res2).unique()) - - # TODO add arg angle_version - bbox_pred = distance2obb(anchor, bbox_pred, angle_version='le90') + bbox_pred = distance2obb( + anchor, bbox_pred, angle_version=self.angle_version) decoded_bboxes.append(bbox_pred) angle_preds_list.append(angle_pred) @@ -316,17 +319,12 @@ def loss_by_feat(self, if self.use_hbbox_loss: decoded_bboxes = decoded_hbboxes - losses_cls, losses_bbox, losses_angle, \ - cls_avg_factors, bbox_avg_factors, angle_avg_factors = multi_apply( - self.loss_by_feat_single, - cls_scores, - decoded_bboxes, - angle_preds_list, - labels_list, - label_weights_list, - bbox_targets_list, - assign_metrics_list, - self.prior_generator.strides) + (losses_cls, losses_bbox, losses_angle, cls_avg_factors, + bbox_avg_factors, angle_avg_factors) = multi_apply( + self.loss_by_feat_single, cls_scores, decoded_bboxes, + angle_preds_list, labels_list, label_weights_list, + bbox_targets_list, assign_metrics_list, + self.prior_generator.strides) cls_avg_factor = reduce_mean(sum(cls_avg_factors)).clamp_(min=1).item() losses_cls = list(map(lambda x: x / cls_avg_factor, losses_cls)) @@ -337,8 +335,12 @@ def loss_by_feat(self, if self.loss_angle is not None: angle_avg_factors = reduce_mean( sum(angle_avg_factors)).clamp_(min=1).item() - losses_angle = list(map(lambda x: x / angle_avg_factors, losses_angle)) - return dict(loss_cls=losses_cls, loss_bbox=losses_bbox, loss_angle=losses_angle) + losses_angle = list( + map(lambda x: x / angle_avg_factors, losses_angle)) + return dict( + loss_cls=losses_cls, + loss_bbox=losses_bbox, + loss_angle=losses_angle) else: return dict(loss_cls=losses_cls, loss_bbox=losses_bbox) @@ -390,7 +392,7 @@ def _get_targets_single(self, img_meta['img_shape'][:2], self.train_cfg['allowed_border']) if not inside_flags.any(): - return (None,) * 7 + return (None, ) * 7 # assign gt and sample anchors anchors = flat_anchors[inside_flags, :] @@ -407,7 +409,7 @@ def _get_targets_single(self, num_valid_anchors = anchors.shape[0] bbox_targets = anchors.new_zeros((*anchors.size()[:-1], 5)) - labels = anchors.new_full((num_valid_anchors,), + labels = anchors.new_full((num_valid_anchors, ), self.num_classes, dtype=torch.long) label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) @@ -420,7 +422,8 @@ def _get_targets_single(self, # point-based pos_bbox_targets = sampling_result.pos_gt_bboxes # TODO add arg angle_version - pos_bbox_targets = pos_bbox_targets.regularize_boxes('le90') + pos_bbox_targets = pos_bbox_targets.regularize_boxes( + self.angle_version) bbox_targets[pos_inds, :] = pos_bbox_targets labels[pos_inds] = sampling_result.pos_gt_labels @@ -716,7 +719,8 @@ def __init__(self, **kwargs) -> None: self.share_conv = share_conv self.exp_on_reg = exp_on_reg - assert scale_angle == False, 'scale_angle does not support in RotatedRTMDetSepBNHead' + assert scale_angle is False, \ + 'scale_angle does not support in RotatedRTMDetSepBNHead' super().__init__( num_classes, in_channels, @@ -804,7 +808,8 @@ def init_weights(self) -> None: if is_norm(m): constant_init(m, 1) bias_cls = bias_init_with_prob(0.01) - for rtm_cls, rtm_reg, rtm_ang in zip(self.rtm_cls, self.rtm_reg, self.rtm_ang): + for rtm_cls, rtm_reg, rtm_ang in zip(self.rtm_cls, self.rtm_reg, + self.rtm_ang): normal_init(rtm_cls, std=0.01, bias=bias_cls) normal_init(rtm_reg, std=0.01) normal_init(rtm_ang, std=0.01) @@ -863,44 +868,43 @@ def forward(self, feats: Tuple[Tensor, ...]) -> tuple: if __name__ == '__main__': - points = torch.tensor([[0., 0., 8., 8.], - [8., 0., 8., 8.], - [16., 0., 8., 8.], - [24., 0., 8., 8.]], device='cuda:0') - - distances = torch.tensor( - [[[7.4215, 7.8629, 7.4568, 8.1447, -0.0224], - [7.3209, 7.7807, 7.4076, 8.1743, -0.0194], - [7.2929, 7.7480, 7.3624, 8.1829, -0.0228], - [7.3291, 7.7770, 7.3966, 8.2230, -0.0215]], - [[7.5713, 7.8529, 7.5189, 8.1220, -0.0212], - [7.6215, 7.8978, 7.5875, 8.2110, -0.0304], - [7.6464, 7.8849, 7.4770, 8.0470, -0.0372], - [7.5255, 7.8110, 7.4478, 8.1207, -0.0282]], - [[7.4363, 7.8605, 7.4451, 8.1569, -0.0239], - [7.3455, 7.7839, 7.4027, 8.1784, -0.0184], - [7.3022, 7.7584, 7.3663, 8.1751, -0.0232], - [7.3419, 7.7717, 7.3903, 8.2077, -0.0242]], - [[7.4416, 7.8393, 7.4837, 8.1546, -0.0213], - [7.3217, 7.7790, 7.3956, 8.1805, -0.0154], - [7.3080, 7.7377, 7.3658, 8.1779, -0.0205], - [7.3390, 7.7580, 7.4031, 8.2245, -0.0205]], - [[7.7741, 7.8088, 7.7546, 8.3303, -0.0151], - [7.7897, 7.9653, 7.9556, 8.3727, -0.0266], - [8.2531, 8.2622, 8.4759, 8.1064, -0.0506], - [8.1185, 7.9733, 8.1983, 8.3481, -0.0243]], - [[7.4850, 7.8193, 7.4702, 8.1592, -0.0248], - [7.3636, 7.7511, 7.4078, 8.1570, -0.0204], - [7.3184, 7.7122, 7.3942, 8.1611, -0.0243], - [7.3617, 7.7788, 7.4001, 8.2358, -0.0256]], - [[7.4395, 7.8696, 7.4530, 8.1517, -0.0246], - [7.3550, 7.7790, 7.4124, 8.1524, -0.0221], - [7.3377, 7.7614, 7.3752, 8.1712, -0.0226], - [7.3411, 7.7603, 7.4000, 8.2265, -0.0246]], - [[7.4344, 7.8780, 7.4388, 8.1534, -0.0247], - [7.3309, 7.8050, 7.3925, 8.1647, -0.0202], - [7.2904, 7.7461, 7.3651, 8.1827, -0.0221], - [7.3486, 7.7741, 7.4090, 8.2144, -0.0230]]], device='cuda:0') + points = torch.tensor([[0., 0., 8., 8.], [8., 0., 8., 8.], + [16., 0., 8., 8.], [24., 0., 8., 8.]], + device='cuda:0') + + distances = torch.tensor([[[7.4215, 7.8629, 7.4568, 8.1447, -0.0224], + [7.3209, 7.7807, 7.4076, 8.1743, -0.0194], + [7.2929, 7.7480, 7.3624, 8.1829, -0.0228], + [7.3291, 7.7770, 7.3966, 8.2230, -0.0215]], + [[7.5713, 7.8529, 7.5189, 8.1220, -0.0212], + [7.6215, 7.8978, 7.5875, 8.2110, -0.0304], + [7.6464, 7.8849, 7.4770, 8.0470, -0.0372], + [7.5255, 7.8110, 7.4478, 8.1207, -0.0282]], + [[7.4363, 7.8605, 7.4451, 8.1569, -0.0239], + [7.3455, 7.7839, 7.4027, 8.1784, -0.0184], + [7.3022, 7.7584, 7.3663, 8.1751, -0.0232], + [7.3419, 7.7717, 7.3903, 8.2077, -0.0242]], + [[7.4416, 7.8393, 7.4837, 8.1546, -0.0213], + [7.3217, 7.7790, 7.3956, 8.1805, -0.0154], + [7.3080, 7.7377, 7.3658, 8.1779, -0.0205], + [7.3390, 7.7580, 7.4031, 8.2245, -0.0205]], + [[7.7741, 7.8088, 7.7546, 8.3303, -0.0151], + [7.7897, 7.9653, 7.9556, 8.3727, -0.0266], + [8.2531, 8.2622, 8.4759, 8.1064, -0.0506], + [8.1185, 7.9733, 8.1983, 8.3481, -0.0243]], + [[7.4850, 7.8193, 7.4702, 8.1592, -0.0248], + [7.3636, 7.7511, 7.4078, 8.1570, -0.0204], + [7.3184, 7.7122, 7.3942, 8.1611, -0.0243], + [7.3617, 7.7788, 7.4001, 8.2358, -0.0256]], + [[7.4395, 7.8696, 7.4530, 8.1517, -0.0246], + [7.3550, 7.7790, 7.4124, 8.1524, -0.0221], + [7.3377, 7.7614, 7.3752, 8.1712, -0.0226], + [7.3411, 7.7603, 7.4000, 8.2265, -0.0246]], + [[7.4344, 7.8780, 7.4388, 8.1534, -0.0247], + [7.3309, 7.8050, 7.3925, 8.1647, -0.0202], + [7.2904, 7.7461, 7.3651, 8.1827, -0.0221], + [7.3486, 7.7741, 7.4090, 8.2144, -0.0230]]], + device='cuda:0') res = distance2obb(points, distances, 'le90').reshape(-1, 5) diff --git a/mmrotate/models/losses/__init__.py b/mmrotate/models/losses/__init__.py index 216543420..644a4db37 100644 --- a/mmrotate/models/losses/__init__.py +++ b/mmrotate/models/losses/__init__.py @@ -3,11 +3,12 @@ from .gaussian_dist_loss import GDLoss from .gaussian_dist_loss_v1 import GDLoss_v1 from .kf_iou_loss import KFLoss +from .rd_iou_loss import RDIoULoss from .rotated_iou_loss import RotatedIoULoss from .smooth_focal_loss import SmoothFocalLoss from .spatial_border_loss import SpatialBorderLoss __all__ = [ 'GDLoss', 'GDLoss_v1', 'KFLoss', 'ConvexGIoULoss', 'BCConvexGIoULoss', - 'SmoothFocalLoss', 'RotatedIoULoss', 'SpatialBorderLoss' + 'SmoothFocalLoss', 'RotatedIoULoss', 'SpatialBorderLoss', 'RDIoULoss' ] diff --git a/mmrotate/models/losses/rd_iou_loss.py b/mmrotate/models/losses/rd_iou_loss.py new file mode 100644 index 000000000..6d971aa00 --- /dev/null +++ b/mmrotate/models/losses/rd_iou_loss.py @@ -0,0 +1,166 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import numpy as np +import torch +import torch.nn as nn +from mmdet.models.losses.utils import weighted_loss + +from mmrotate.registry import MODELS +from mmrotate.structures import norm_angle + + +@weighted_loss +def rd_iou_loss(pred, target, linear=False, mode='log', eps=1e-6): + """Rotated IoU loss. + + Computing the IoU loss between a set of predicted rbboxes and target + rbboxes. + The loss is calculated as negative log of IoU. + + Args: + pred (torch.Tensor): Predicted bboxes of format (x, y, h, w, angle), + shape (n, 5). + target (torch.Tensor): Corresponding gt bboxes, shape (n, 5). + linear (bool, optional): If True, use linear scale of loss instead of + log scale. Default: False. + mode (str): Loss scaling mode, including "linear", "square", and "log". + Default: 'log' + eps (float): Eps to avoid log(0). + Return: + torch.Tensor: Loss tensor. + """ + assert mode in ['linear', 'square', 'log'] + if linear: + mode = 'linear' + warnings.warn( + 'DeprecationWarning: Setting "linear=True" in ' + 'poly_iou_loss is deprecated, please use "mode=`linear`" ' + 'instead.') + + pred_x, pred_y, pred_w, pred_h, pred_t = pred.split([1, 1, 1, 1, 1], + dim=-1) + target_x, target_y, target_w, target_h, target_t = target.split( + [1, 1, 1, 1, 1], dim=-1) + + target_z = torch.zeros_like(target_t) + target_l = torch.ones_like(target_t) * 0.5 * np.pi + + pred_z = torch.ones_like(pred_t) * norm_angle(pred_t - target_t, 'le90') + pred_l = torch.ones_like(pred_t) * 0.5 * np.pi + + area_pred = pred_w * pred_h * pred_l + area_target = target_w * target_h * target_l + + union = ( + f(pred_x, target_x, pred_w, target_w) * + f(pred_y, target_y, pred_h, target_h) * + f(pred_z, target_z, pred_l, target_l)) + + ious = union / (area_pred + area_target - union) + + enclose_area = ( + f2(pred_x, target_x, pred_w, target_w) * + f2(pred_y, target_y, pred_h, target_h) * + f2(pred_z, target_z, pred_l, target_l)) + + gious = ious - (enclose_area - union) / enclose_area + + # ious = ious.squeeze(0).clamp(min=eps) + + loss = 1 - gious.squeeze(-1) + + return loss + + +def f(x1, x2, w1, w2): + ff = torch.min(x1 + 0.5 * w1, x2 + 0.5 * w2) - torch.max( + x1 - 0.5 * w1, x2 - 0.5 * w2) + return ff.clamp(min=0) + + +def f2(x1, x2, w1, w2): + ff = torch.max(x1 + 0.5 * w1, x2 + 0.5 * w2) - torch.min( + x1 - 0.5 * w1, x2 - 0.5 * w2) + return ff.clamp(min=0) + + +@MODELS.register_module() +class RDIoULoss(nn.Module): + """RotatedIoULoss. + + Computing the IoU loss between a set of predicted rbboxes and + target rbboxes. + Args: + linear (bool): If True, use linear scale of loss else determined + by mode. Default: False. + eps (float): Eps to avoid log(0). + reduction (str): Options are "none", "mean" and "sum". + loss_weight (float): Weight of loss. + mode (str): Loss scaling mode, including "linear", "square", and "log". + Default: 'log' + """ + + def __init__(self, + linear=False, + eps=1e-6, + reduction='mean', + loss_weight=1.0, + mode='log'): + super(RDIoULoss, self).__init__() + assert mode in ['linear', 'square', 'log'] + if linear: + mode = 'linear' + warnings.warn('DeprecationWarning: Setting "linear=True" in ' + 'IOULoss is deprecated, please use "mode=`linear`" ' + 'instead.') + self.mode = mode + self.linear = linear + self.eps = eps + self.reduction = reduction + self.loss_weight = loss_weight + + def forward(self, + pred, + target, + weight=None, + avg_factor=None, + reduction_override=None, + **kwargs): + """Forward function. + + Args: + pred (torch.Tensor): The prediction. + target (torch.Tensor): The learning target of the prediction. + weight (torch.Tensor, optional): The weight of loss for each + prediction. Defaults to None. + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + reduction_override (str, optional): The reduction method used to + override the original reduction method of the loss. + Defaults to None. Options are "none", "mean" and "sum". + """ + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + if (weight is not None) and (not torch.any(weight > 0)) and ( + reduction != 'none'): + if pred.dim() == weight.dim() + 1: + weight = weight.unsqueeze(1) + return (pred * weight).sum() # 0 + if weight is not None and weight.dim() > 1: + # TODO: remove this in the future + # reduce the weight of shape (n, 5) to (n,) to match the + # iou_loss of shape (n,) + assert weight.shape == pred.shape + weight = weight.mean(-1) + loss = self.loss_weight * rd_iou_loss( + pred, + target, + weight, + mode=self.mode, + eps=self.eps, + reduction=reduction, + avg_factor=avg_factor, + **kwargs) + return loss From c50b6be95c34a5a8b7337e6fa2ad5e89b1fa0e0c Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Tue, 8 Nov 2022 19:16:08 +0800 Subject: [PATCH 17/52] clean --- mmrotate/models/dense_heads/__init__.py | 2 +- mmrotate/models/task_modules/coders/angle_coder.py | 4 +++- .../models/task_modules/coders/distance_angle_point_coder.py | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/mmrotate/models/dense_heads/__init__.py b/mmrotate/models/dense_heads/__init__.py index 311aa2c83..4fcf22a32 100644 --- a/mmrotate/models/dense_heads/__init__.py +++ b/mmrotate/models/dense_heads/__init__.py @@ -8,9 +8,9 @@ from .rotated_fcos_head import RotatedFCOSHead from .rotated_reppoints_head import RotatedRepPointsHead from .rotated_retina_head import RotatedRetinaHead +from .rotated_rtmdet_head import RotatedRTMDetHead, RotatedRTMDetSepBNHead from .s2a_head import S2AHead, S2ARefineHead from .sam_reppoints_head import SAMRepPointsHead -from .rotated_rtmdet_head import RotatedRTMDetHead, RotatedRTMDetSepBNHead __all__ = [ 'RotatedRetinaHead', 'OrientedRPNHead', 'RotatedRepPointsHead', diff --git a/mmrotate/models/task_modules/coders/angle_coder.py b/mmrotate/models/task_modules/coders/angle_coder.py index 8db58fa6a..a236e50d2 100644 --- a/mmrotate/models/task_modules/coders/angle_coder.py +++ b/mmrotate/models/task_modules/coders/angle_coder.py @@ -178,4 +178,6 @@ def decode(self, angle: Tensor, keepdim: bool = False) -> Tensor: decode_angle = decode_angle.reshape(*angle.shape[:-1], 1) else: decode_angle = decode_angle.reshape(-1) - return self.angle_range * decode_angle / self.reg_max - self.angle_offset + decode_angle = self.angle_offset * decode_angle / self.reg_max + decode_angle = decode_angle - self.angle_offset + return decode_angle diff --git a/mmrotate/models/task_modules/coders/distance_angle_point_coder.py b/mmrotate/models/task_modules/coders/distance_angle_point_coder.py index 0b7d3f9ec..b92647088 100644 --- a/mmrotate/models/task_modules/coders/distance_angle_point_coder.py +++ b/mmrotate/models/task_modules/coders/distance_angle_point_coder.py @@ -99,7 +99,8 @@ def distance2obb(self, cos_angle, sin_angle = torch.cos(angle), torch.sin(angle) - rot_matrix = torch.cat([cos_angle, -sin_angle, sin_angle, cos_angle], dim=-1) + rot_matrix = torch.cat([cos_angle, -sin_angle, sin_angle, cos_angle], + dim=-1) rot_matrix = rot_matrix.reshape(*rot_matrix.shape[:-1], 2, 2) wh = distance[..., :2] + distance[..., 2:] From 55df158f74aa07104a6dd1e279a344f16e3554f4 Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Wed, 9 Nov 2022 15:43:26 +0800 Subject: [PATCH 18/52] add probiou --- mmrotate/models/losses/__init__.py | 4 +- mmrotate/models/losses/prob_iou_loss.py | 136 ++++++++++++++++++++++++ 2 files changed, 139 insertions(+), 1 deletion(-) create mode 100644 mmrotate/models/losses/prob_iou_loss.py diff --git a/mmrotate/models/losses/__init__.py b/mmrotate/models/losses/__init__.py index 644a4db37..a49cdd0ff 100644 --- a/mmrotate/models/losses/__init__.py +++ b/mmrotate/models/losses/__init__.py @@ -3,6 +3,7 @@ from .gaussian_dist_loss import GDLoss from .gaussian_dist_loss_v1 import GDLoss_v1 from .kf_iou_loss import KFLoss +from .prob_iou_loss import ProbIoULoss from .rd_iou_loss import RDIoULoss from .rotated_iou_loss import RotatedIoULoss from .smooth_focal_loss import SmoothFocalLoss @@ -10,5 +11,6 @@ __all__ = [ 'GDLoss', 'GDLoss_v1', 'KFLoss', 'ConvexGIoULoss', 'BCConvexGIoULoss', - 'SmoothFocalLoss', 'RotatedIoULoss', 'SpatialBorderLoss', 'RDIoULoss' + 'SmoothFocalLoss', 'RotatedIoULoss', 'SpatialBorderLoss', 'RDIoULoss', + 'ProbIoULoss' ] diff --git a/mmrotate/models/losses/prob_iou_loss.py b/mmrotate/models/losses/prob_iou_loss.py new file mode 100644 index 000000000..d8acc0eef --- /dev/null +++ b/mmrotate/models/losses/prob_iou_loss.py @@ -0,0 +1,136 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from mmdet.models import weighted_loss +from torch import nn + +from mmrotate.registry import MODELS + + +def gbb_form(boxes): + return torch.cat( + (boxes[:, :2], torch.pow(boxes[:, 2:4], 2) / 12, boxes[:, 4:]), 1) + + +def rotated_form(a_, b_, angles): + a = a_ * torch.pow(torch.cos(angles), 2.) + b_ * torch.pow( + torch.sin(angles), 2.) + b = a_ * torch.pow(torch.sin(angles), 2.) + b_ * torch.pow( + torch.cos(angles), 2.) + c = a_ * torch.cos(angles) * torch.sin(angles) - b_ * torch.sin( + angles) * torch.cos(angles) + return a, b, c + + +@weighted_loss +def probiou_loss(pred, target, eps=1e-3, mode='l1'): + """pred -> a matrix [N,5](x,y,w,h,angle) containing ours predicted box + target -> a matrix [N,5](x,y,w,h,angle) containing ours target box eps. + + -> threshold to avoid infinite values mode -> ('l1' in [0,1] or 'l2' in + [0,inf]) metrics according our paper. + """ + + gbboxes1 = gbb_form(pred) + gbboxes2 = gbb_form(target) + + (x1, y1, a1_, b1_, c1_) = (gbboxes1[:, 0], gbboxes1[:, 1], gbboxes1[:, 2], + gbboxes1[:, 3], gbboxes1[:, 4]) + (x2, y2, a2_, b2_, c2_) = (gbboxes2[:, 0], gbboxes2[:, 1], gbboxes2[:, 2], + gbboxes2[:, 3], gbboxes2[:, 4]) + + a1, b1, c1 = rotated_form(a1_, b1_, c1_) + a2, b2, c2 = rotated_form(a2_, b2_, c2_) + + t1 = (((a1 + a2) * (torch.pow(y1 - y2, 2)) + (b1 + b2) * + (torch.pow(x1 - x2, 2))) / ((a1 + a2) * (b1 + b2) - + (torch.pow(c1 + c2, 2)) + eps)) * 0.25 + t2 = (((c1 + c2) * (x2 - x1) * (y1 - y2)) / + ((a1 + a2) * (b1 + b2) - (torch.pow(c1 + c2, 2)) + eps)) * 0.5 + t3 = torch.log(((a1 + a2) * (b1 + b2) - (torch.pow(c1 + c2, 2))) / + (4 * torch.sqrt((a1 * b1 - torch.pow(c1, 2)) * + (a2 * b2 - torch.pow(c2, 2))) + eps) + + eps) * 0.5 + + B_d = t1 + t2 + t3 + + B_d = torch.clamp(B_d, eps, 100.0) + l1 = torch.sqrt(1.0 - torch.exp(-B_d) + eps) + l_i = torch.pow(l1, 2.0) + l2 = -torch.log(1.0 - l_i + eps) + + if mode == 'l1': + probiou = l1 + if mode == 'l2': + probiou = l2 + + return probiou + + +@MODELS.register_module() +class ProbIoULoss(nn.Module): + """RotatedIoULoss. + + Computing the IoU loss between a set of predicted rbboxes and + target rbboxes. + Args: + linear (bool): If True, use linear scale of loss else determined + by mode. Default: False. + eps (float): Eps to avoid log(0). + reduction (str): Options are "none", "mean" and "sum". + loss_weight (float): Weight of loss. + mode (str): Loss scaling mode, including "linear", "square", and "log". + Default: 'log' + """ + + def __init__(self, mode='l1', eps=1e-6, reduction='mean', loss_weight=1.0): + super(ProbIoULoss, self).__init__() + + self.mode = mode + self.eps = eps + self.reduction = reduction + self.loss_weight = loss_weight + + def forward(self, + pred, + target, + weight=None, + avg_factor=None, + reduction_override=None, + **kwargs): + """Forward function. + + Args: + pred (torch.Tensor): The prediction. + target (torch.Tensor): The learning target of the prediction. + weight (torch.Tensor, optional): The weight of loss for each + prediction. Defaults to None. + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + reduction_override (str, optional): The reduction method used to + override the original reduction method of the loss. + Defaults to None. Options are "none", "mean" and "sum". + """ + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + if (weight is not None) and (not torch.any(weight > 0)) and ( + reduction != 'none'): + if pred.dim() == weight.dim() + 1: + weight = weight.unsqueeze(1) + return (pred * weight).sum() # 0 + if weight is not None and weight.dim() > 1: + # TODO: remove this in the future + # reduce the weight of shape (n, 5) to (n,) to match the + # iou_loss of shape (n,) + assert weight.shape == pred.shape + weight = weight.mean(-1) + loss = self.loss_weight * probiou_loss( + pred, + target, + weight, + mode=self.mode, + eps=self.eps, + reduction=reduction, + avg_factor=avg_factor, + **kwargs) + return loss From cb670d796639aa72f500a4d082bf5dc8fd58940e Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Wed, 9 Nov 2022 15:44:25 +0800 Subject: [PATCH 19/52] add hrsc cfg --- configs/rotated_rtmdet/_base_/hrsc_rr.py | 76 ++++++++++++++++++ .../rotated_rtmdet_l-3x-hrsc.py | 77 +++++++++++++++++++ .../rotated_rtmdet_m-3x-hrsc.py | 7 ++ .../rotated_rtmdet_s-3x-hrsc.py | 12 +++ .../rotated_rtmdet_tiny-3x-hrsc.py | 19 +++++ .../rotated_rtmdet_x-3x-hrsc.py | 8 ++ 6 files changed, 199 insertions(+) create mode 100644 configs/rotated_rtmdet/_base_/hrsc_rr.py create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_l-3x-hrsc.py create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_m-3x-hrsc.py create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_s-3x-hrsc.py create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-hrsc.py create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_x-3x-hrsc.py diff --git a/configs/rotated_rtmdet/_base_/hrsc_rr.py b/configs/rotated_rtmdet/_base_/hrsc_rr.py new file mode 100644 index 000000000..d2c848aa2 --- /dev/null +++ b/configs/rotated_rtmdet/_base_/hrsc_rr.py @@ -0,0 +1,76 @@ +# dataset settings +dataset_type = 'HRSCDataset' +data_root = '/home/wangchen/liuyanyi/datasets/hrsc/' +file_client_args = dict(backend='disk') + +train_pipeline = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict(type='mmdet.Resize', scale=(800, 800), keep_ratio=True), + dict( + type='mmdet.RandomFlip', + prob=0.75, + direction=['horizontal', 'vertical', 'diagonal']), + dict(type='RandomRotate', prob=0.5, angle_range=180), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict(type='mmdet.PackDetInputs') +] +val_pipeline = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.Resize', scale=(800, 800), keep_ratio=True), + # avoid bboxes being resized + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] +test_pipeline = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.Resize', scale=(800, 800), keep_ratio=True), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] +train_dataloader = dict( + batch_size=8, + num_workers=8, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + batch_sampler=None, + pin_memory=True, + dataset=dict( + type='RepeatDataset', + times=3, + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='ImageSets/trainval.txt', + data_prefix=dict(sub_data_root='FullDataSet/'), + filter_cfg=dict(filter_empty_gt=True), + pipeline=train_pipeline))) +val_dataloader = dict( + batch_size=1, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='ImageSets/test.txt', + data_prefix=dict(sub_data_root='FullDataSet/'), + test_mode=True, + pipeline=val_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='DOTAMetric', + iou_thrs=[0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95], + metric='mAP') +test_evaluator = val_evaluator diff --git a/configs/rotated_rtmdet/rotated_rtmdet_l-3x-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_l-3x-hrsc.py new file mode 100644 index 000000000..01faa26f1 --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_l-3x-hrsc.py @@ -0,0 +1,77 @@ +_base_ = [ + './_base_/default_runtime.py', './_base_/schedule_3x.py', + './_base_/hrsc_rr.py' +] +angle_version = 'le90' +model = dict( + type='mmdet.RTMDet', + data_preprocessor=dict( + type='mmdet.DetDataPreprocessor', + mean=[103.53, 116.28, 123.675], + std=[57.375, 57.12, 58.395], + bgr_to_rgb=False, + boxtype2tensor=False, + batch_augments=None), + backbone=dict( + type='mmdet.CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=1, + widen_factor=1, + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU')), + neck=dict( + type='mmdet.CSPNeXtPAFPN', + in_channels=[256, 512, 1024], + out_channels=256, + num_csp_blocks=3, + expand_ratio=0.5, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU')), + bbox_head=dict( + type='RotatedRTMDetSepBNHead', + # type='RTMDetSepBNHead', + num_classes=15, + in_channels=256, + stacked_convs=2, + feat_channels=256, + angle_version=angle_version, + anchor_generator=dict( + type='mmdet.MlvlPointGenerator', offset=0, strides=[8, 16, 32]), + bbox_coder=dict( + type='DistanceAnglePointCoder', angle_version=angle_version), + loss_cls=dict( + type='mmdet.QualityFocalLoss', + use_sigmoid=True, + beta=2.0, + loss_weight=1.0), + loss_bbox=dict(type='RotatedIoULoss', mode='linear', loss_weight=2.0), + with_objectness=False, + exp_on_reg=True, + share_conv=True, + pred_kernel_size=1, + use_hbbox_loss=False, + scale_angle=False, + # angle_coder=dict( + # type='DistributionAngleCoder', + # angle_version='le90'), + loss_angle=None, + # loss_bbox=dict(type='mmdet.GIoULoss', loss_weight=2.0), + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU')), + train_cfg=dict( + assigner=dict( + type='mmdet.DynamicSoftLabelAssigner', + iou_calculator=dict(type='RBboxOverlaps2D'), + topk=13), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=2000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms_rotated', iou_threshold=0.1), + max_per_img=2000), +) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_m-3x-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_m-3x-hrsc.py new file mode 100644 index 000000000..1225f5119 --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_m-3x-hrsc.py @@ -0,0 +1,7 @@ +_base_ = './rotated_rtmdet_l-3x-hrsc.py' + +# coco_ckpt = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth' # noqa +model = dict( + backbone=dict(deepen_factor=0.67, widen_factor=0.75), + neck=dict(in_channels=[192, 384, 768], out_channels=192, num_csp_blocks=2), + bbox_head=dict(in_channels=192, feat_channels=192)) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_s-3x-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_s-3x-hrsc.py new file mode 100644 index 000000000..8e9604a23 --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_s-3x-hrsc.py @@ -0,0 +1,12 @@ +_base_ = './rotated_rtmdet_l-3x-hrsc.py' + +# coco_ckpt = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth' # noqa +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' # noqa +model = dict( + backbone=dict( + deepen_factor=0.33, + widen_factor=0.5, + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint)), + neck=dict(in_channels=[128, 256, 512], out_channels=128, num_csp_blocks=1), + bbox_head=dict(in_channels=128, feat_channels=128, exp_on_reg=False)) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-hrsc.py new file mode 100644 index 000000000..5238867a8 --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-hrsc.py @@ -0,0 +1,19 @@ +_base_ = './rotated_rtmdet_l-3x-hrsc.py' + +# coco_ckpt = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth' # noqa +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa + +model = dict( + # init_cfg=dict(type='Pretrained', checkpoint=coco_ckpt), + backbone=dict( + deepen_factor=0.167, + widen_factor=0.375, + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint)), + neck=dict(in_channels=[96, 192, 384], out_channels=96, num_csp_blocks=1), + bbox_head=dict( + in_channels=96, + feat_channels=96, + exp_on_reg=False, + loss_bbox=dict(type='RotatedIoULoss', mode='linear', loss_weight=2.0), + )) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_x-3x-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_x-3x-hrsc.py new file mode 100644 index 000000000..a3539c096 --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_x-3x-hrsc.py @@ -0,0 +1,8 @@ +_base_ = './rotated_rtmdet_l-3x-hrsc.py' + +# coco_ckpt = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth' # noqa +model = dict( + backbone=dict(deepen_factor=1.33, widen_factor=1.25), + neck=dict( + in_channels=[320, 640, 1280], out_channels=320, num_csp_blocks=4), + bbox_head=dict(in_channels=320, feat_channels=320)) From 27634442b96d0a498184a3049e93f87ea498aa61 Mon Sep 17 00:00:00 2001 From: Yanyi Liu Date: Thu, 10 Nov 2022 22:12:17 +0800 Subject: [PATCH 20/52] fix GDLoss with empty input --- mmrotate/models/losses/gaussian_dist_loss.py | 4 +++- mmrotate/models/losses/gaussian_dist_loss_v1.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/mmrotate/models/losses/gaussian_dist_loss.py b/mmrotate/models/losses/gaussian_dist_loss.py index 782441212..3e64b1724 100644 --- a/mmrotate/models/losses/gaussian_dist_loss.py +++ b/mmrotate/models/losses/gaussian_dist_loss.py @@ -386,7 +386,9 @@ def forward(self, reduction_override if reduction_override else self.reduction) if (weight is not None) and (not torch.any(weight > 0)) and ( reduction != 'none'): - return (pred * weight).sum() + if pred.dim() == weight.dim() + 1: + weight = weight.unsqueeze(1) + return (pred * weight).sum() # 0 if weight is not None and weight.dim() > 1: assert weight.shape == pred.shape weight = weight.mean(-1) diff --git a/mmrotate/models/losses/gaussian_dist_loss_v1.py b/mmrotate/models/losses/gaussian_dist_loss_v1.py index 1685ae89c..09f6f4a6a 100644 --- a/mmrotate/models/losses/gaussian_dist_loss_v1.py +++ b/mmrotate/models/losses/gaussian_dist_loss_v1.py @@ -213,7 +213,9 @@ def forward(self, reduction_override if reduction_override else self.reduction) if (weight is not None) and (not torch.any(weight > 0)) and ( reduction != 'none'): - return (pred * weight).sum() + if pred.dim() == weight.dim() + 1: + weight = weight.unsqueeze(1) + return (pred * weight).sum() # 0 if weight is not None and weight.dim() > 1: assert weight.shape == pred.shape weight = weight.mean(-1) From ec74f7f3aa471fb4ef315e0f0be45d4e987ddd73 Mon Sep 17 00:00:00 2001 From: Yanyi Liu Date: Thu, 10 Nov 2022 22:13:01 +0800 Subject: [PATCH 21/52] add probiou cfg --- .../rotated_rtmdet_tiny_probiou-3x-hrsc.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_tiny_probiou-3x-hrsc.py diff --git a/configs/rotated_rtmdet/rotated_rtmdet_tiny_probiou-3x-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_tiny_probiou-3x-hrsc.py new file mode 100644 index 000000000..2b4bf41ab --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_tiny_probiou-3x-hrsc.py @@ -0,0 +1,19 @@ +_base_ = './rotated_rtmdet_l-3x-hrsc.py' + +# coco_ckpt = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth' # noqa +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa + +model = dict( + # init_cfg=dict(type='Pretrained', checkpoint=coco_ckpt), + backbone=dict( + deepen_factor=0.167, + widen_factor=0.375, + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint)), + neck=dict(in_channels=[96, 192, 384], out_channels=96, num_csp_blocks=1), + bbox_head=dict( + in_channels=96, + feat_channels=96, + exp_on_reg=False, + loss_bbox=dict(type='ProbIoULoss', mode='l1', loss_weight=2.0), + )) From 632f49cf1af95c5d97b95dcaae06a87950a6db2f Mon Sep 17 00:00:00 2001 From: Yanyi Liu Date: Tue, 15 Nov 2022 14:17:00 +0800 Subject: [PATCH 22/52] add mixup mosaic cfg --- .../rotated_rtmdet/_base_/default_runtime.py | 11 +- .../_base_/dota_mixup_mosaic.py | 158 ++++++++++++++++++ configs/rotated_rtmdet/_base_/dota_rr.py | 9 + .../_base_/hrsc_mosaic_mixup.py | 132 +++++++++++++++ configs/rotated_rtmdet/_base_/hrsc_rr.py | 9 + .../rotated_rtmdet/_base_/schedule_100e.py | 34 ++++ .../models/dense_heads/rotated_rtmdet_head.py | 10 +- 7 files changed, 350 insertions(+), 13 deletions(-) create mode 100644 configs/rotated_rtmdet/_base_/dota_mixup_mosaic.py create mode 100644 configs/rotated_rtmdet/_base_/hrsc_mosaic_mixup.py create mode 100644 configs/rotated_rtmdet/_base_/schedule_100e.py diff --git a/configs/rotated_rtmdet/_base_/default_runtime.py b/configs/rotated_rtmdet/_base_/default_runtime.py index 724ad4014..0a91907ef 100644 --- a/configs/rotated_rtmdet/_base_/default_runtime.py +++ b/configs/rotated_rtmdet/_base_/default_runtime.py @@ -4,7 +4,7 @@ timer=dict(type='IterTimerHook'), logger=dict(type='LoggerHook', interval=50), param_scheduler=dict(type='ParamSchedulerHook'), - checkpoint=dict(type='CheckpointHook', interval=12, max_keep_ckpts=3), + checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=3), sampler_seed=dict(type='DistSamplerSeedHook'), visualization=dict(type='mmdet.DetVisualizationHook')) @@ -22,12 +22,3 @@ log_level = 'INFO' load_from = None resume = False - -custom_hooks = [ - dict( - type='EMAHook', - ema_type='mmdet.ExpMomentumEMA', - momentum=0.0002, - update_buffers=True, - priority=49) -] diff --git a/configs/rotated_rtmdet/_base_/dota_mixup_mosaic.py b/configs/rotated_rtmdet/_base_/dota_mixup_mosaic.py new file mode 100644 index 000000000..998a45f57 --- /dev/null +++ b/configs/rotated_rtmdet/_base_/dota_mixup_mosaic.py @@ -0,0 +1,158 @@ +# dataset settings +dataset_type = 'DOTADataset' +# data_root = '/home/wangchen/liuyanyi/datasets/dota_mmrotate_ss/' +data_root = '/datasets/dota_mmrotate_ss/' +file_client_args = dict(backend='disk') + +train_pipeline = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict( + type='mmdet.CachedMosaic', + img_scale=(1024, 1024), + pad_val=114.0, + max_cached_images=20, + random_pop=False), + dict( + type='mmdet.RandomResize', + resize_type='mmdet.Resize', + scale=(2048, 2048), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomRotate', prob=0.5, angle_range=180), + dict(type='mmdet.RandomCrop', crop_size=(1024, 1024)), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='mmdet.RandomFlip', + prob=0.75, + direction=['horizontal', 'vertical', 'diagonal']), + dict( + type='mmdet.Pad', size=(1024, 1024), + pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.CachedMixUp', + img_scale=(1024, 1024), + ratio_range=(1.0, 1.0), + max_cached_images=10, + random_pop=False, + pad_val=(114, 114, 114), + prob=0.5), + dict(type='mmdet.PackDetInputs') +] + +train_pipeline_stage2 = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict( + type='mmdet.RandomResize', + resize_type='mmdet.Resize', + scale=(1024, 1024), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomRotate', prob=0.5, angle_range=180), + dict(type='mmdet.RandomCrop', crop_size=(1024, 1024)), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='mmdet.RandomFlip', + prob=0.75, + direction=['horizontal', 'vertical', 'diagonal']), + dict( + type='mmdet.Pad', size=(1024, 1024), + pad_val=dict(img=(114, 114, 114))), + dict(type='mmdet.PackDetInputs') +] + +val_pipeline = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True), + # avoid bboxes being resized + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict( + type='mmdet.Pad', size=(1024, 1024), + pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] +test_pipeline = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True), + dict( + type='mmdet.Pad', size=(1024, 1024), + pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] +train_dataloader = dict( + batch_size=8, + num_workers=8, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + batch_sampler=None, + pin_memory=True, + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='trainval/annfiles/', + data_prefix=dict(img_path='trainval/images/'), + img_shape=(1024, 1024), + filter_cfg=dict(filter_empty_gt=True), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='val/annfiles/', + data_prefix=dict(img_path='val/images/'), + img_shape=(1024, 1024), + test_mode=True, + pipeline=val_pipeline)) +# test_dataloader = val_dataloader + +val_evaluator = dict(type='DOTAMetric', metric='mAP') +# test_evaluator = val_evaluator + +# inference on test dataset and format the output results +# for submission. Note: the test set has no annotation. +test_dataloader = dict( + batch_size=8, + num_workers=8, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict(img_path='test/images/'), + img_shape=(1024, 1024), + test_mode=True, + pipeline=test_pipeline)) +test_evaluator = dict( + type='DOTAMetric', + format_only=True, + merge_patches=True, + outfile_prefix='./work_dirs/rtm_iou_test_coco/Task1') + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='mmdet.ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=30, + switch_pipeline=train_pipeline_stage2) +] diff --git a/configs/rotated_rtmdet/_base_/dota_rr.py b/configs/rotated_rtmdet/_base_/dota_rr.py index e1c9bf46b..96bd7ac6d 100644 --- a/configs/rotated_rtmdet/_base_/dota_rr.py +++ b/configs/rotated_rtmdet/_base_/dota_rr.py @@ -102,3 +102,12 @@ format_only=True, merge_patches=True, outfile_prefix='./work_dirs/rtm_iou_test_coco/Task1') + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='mmdet.ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49) +] diff --git a/configs/rotated_rtmdet/_base_/hrsc_mosaic_mixup.py b/configs/rotated_rtmdet/_base_/hrsc_mosaic_mixup.py new file mode 100644 index 000000000..519c9c1dd --- /dev/null +++ b/configs/rotated_rtmdet/_base_/hrsc_mosaic_mixup.py @@ -0,0 +1,132 @@ +# dataset settings +dataset_type = 'HRSCDataset' +data_root = '/home/wangchen/liuyanyi/datasets/hrsc/' +file_client_args = dict(backend='disk') + +train_pipeline = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict( + type='mmdet.CachedMosaic', + img_scale=(800, 800), + pad_val=114.0, + max_cached_images=20, + random_pop=False), + dict( + type='mmdet.RandomResize', + resize_type='mmdet.Resize', + scale=(1600, 1600), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomRotate', prob=0.5, angle_range=180), + dict(type='mmdet.RandomCrop', crop_size=(800, 800)), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='mmdet.RandomFlip', + prob=0.75, + direction=['horizontal', 'vertical', 'diagonal']), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.CachedMixUp', + img_scale=(800, 800), + ratio_range=(1.0, 1.0), + max_cached_images=10, + random_pop=False, + pad_val=(114, 114, 114), + prob=0.5), + dict(type='mmdet.PackDetInputs') +] + +train_pipeline_stage2 = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict( + type='mmdet.RandomResize', + resize_type='mmdet.Resize', + scale=(800, 800), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomRotate', prob=0.5, angle_range=180), + dict(type='mmdet.RandomCrop', crop_size=(800, 800)), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='mmdet.RandomFlip', + prob=0.75, + direction=['horizontal', 'vertical', 'diagonal']), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict(type='mmdet.PackDetInputs') +] + +val_pipeline = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.Resize', scale=(800, 800), keep_ratio=True), + # avoid bboxes being resized + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] +test_pipeline = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.Resize', scale=(800, 800), keep_ratio=True), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] +train_dataloader = dict( + batch_size=8, + num_workers=8, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + batch_sampler=None, + pin_memory=True, + dataset=dict( + type='RepeatDataset', + times=3, + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='ImageSets/trainval.txt', + data_prefix=dict(sub_data_root='FullDataSet/'), + filter_cfg=dict(filter_empty_gt=True), + pipeline=train_pipeline))) +val_dataloader = dict( + batch_size=1, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='ImageSets/test.txt', + data_prefix=dict(sub_data_root='FullDataSet/'), + test_mode=True, + pipeline=val_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='DOTAMetric', + iou_thrs=[0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95], + metric='mAP') +test_evaluator = val_evaluator + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='mmdet.ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=90, + switch_pipeline=train_pipeline_stage2) +] diff --git a/configs/rotated_rtmdet/_base_/hrsc_rr.py b/configs/rotated_rtmdet/_base_/hrsc_rr.py index d2c848aa2..51e0b7b73 100644 --- a/configs/rotated_rtmdet/_base_/hrsc_rr.py +++ b/configs/rotated_rtmdet/_base_/hrsc_rr.py @@ -74,3 +74,12 @@ iou_thrs=[0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95], metric='mAP') test_evaluator = val_evaluator + +custom_hooks = [ + dict( + type='EMAHook', + ema_type='mmdet.ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49) +] diff --git a/configs/rotated_rtmdet/_base_/schedule_100e.py b/configs/rotated_rtmdet/_base_/schedule_100e.py new file mode 100644 index 000000000..c0ca01a0d --- /dev/null +++ b/configs/rotated_rtmdet/_base_/schedule_100e.py @@ -0,0 +1,34 @@ +max_epochs = 100 +base_lr = 0.004 / 16 +interval = 20 + +train_cfg = dict( + type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=interval) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# optimizer +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict( + norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) diff --git a/mmrotate/models/dense_heads/rotated_rtmdet_head.py b/mmrotate/models/dense_heads/rotated_rtmdet_head.py index b7b7c1cf9..f89cfb920 100644 --- a/mmrotate/models/dense_heads/rotated_rtmdet_head.py +++ b/mmrotate/models/dense_heads/rotated_rtmdet_head.py @@ -219,10 +219,14 @@ def loss_by_feat_single(self, cls_score: Tensor, bbox_pred: Tensor, pos_angle_pred = angle_pred[pos_inds] pos_angle_target = pos_bbox_targets[:, 4:5] pos_angle_target = self.angle_coder.encode(pos_angle_target) + if pos_angle_target.dim() == 2: + pos_angle_weight = pos_bbox_weight.unsqueeze(-1) + else: + pos_angle_weight = pos_bbox_weight loss_angle = self.loss_angle( pos_angle_pred, pos_angle_target, - weight=pos_bbox_weight, + weight=pos_angle_weight, avg_factor=1.0) loss_bbox = self.loss_bbox( @@ -236,8 +240,8 @@ def loss_by_feat_single(self, cls_score: Tensor, bbox_pred: Tensor, pos_bbox_weight = bbox_targets.new_tensor(0.) loss_angle = angle_pred.sum() * 0 - return loss_cls, loss_bbox, loss_angle, assign_metrics.sum( - ), pos_bbox_weight.sum(), pos_bbox_weight.sum() + return (loss_cls, loss_bbox, loss_angle, assign_metrics.sum(), + pos_bbox_weight.sum(), pos_bbox_weight.sum()) def loss_by_feat(self, cls_scores: List[Tensor], From 5bf4e4a11e44f497b8bee95af0cf7b450517e960 Mon Sep 17 00:00:00 2001 From: Yanyi Liu Date: Sun, 20 Nov 2022 11:07:04 +0800 Subject: [PATCH 23/52] fix mmdeploy vis error --- mmrotate/visualization/local_visualizer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mmrotate/visualization/local_visualizer.py b/mmrotate/visualization/local_visualizer.py index 3d54e9fce..ea28c6383 100644 --- a/mmrotate/visualization/local_visualizer.py +++ b/mmrotate/visualization/local_visualizer.py @@ -81,8 +81,9 @@ def _draw_instances(self, image: np.ndarray, instances: ['InstanceData'], 'or (n, 8), but get `bboxes` with shape being ' f'{bboxes.shape}.') + bboxes = bboxes.cpu() polygons = bboxes.convert_to('qbox').tensor - polygons = polygons.reshape(-1, 4, 2).numpy() + polygons = polygons.reshape(-1, 4, 2) polygons = [p for p in polygons] self.draw_polygons( polygons, From df09227be43a90a390d34bb2cf39bbfddd0ff968 Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Sun, 20 Nov 2022 15:34:09 +0800 Subject: [PATCH 24/52] add function_rewriter for deploy --- mmrotate/deploy/rotated_rtmdet_head.py | 119 +++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 mmrotate/deploy/rotated_rtmdet_head.py diff --git a/mmrotate/deploy/rotated_rtmdet_head.py b/mmrotate/deploy/rotated_rtmdet_head.py new file mode 100644 index 000000000..d46f35b88 --- /dev/null +++ b/mmrotate/deploy/rotated_rtmdet_head.py @@ -0,0 +1,119 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import List, Optional + +import torch +from mmdeploy.codebase.mmdet import get_post_processing_params +from mmdeploy.codebase.mmrotate.core.post_processing.bbox_nms import \ + _multiclass_nms_rotated +from mmdeploy.core import FUNCTION_REWRITER +from mmengine.config import ConfigDict +from mmengine.structures import InstanceData +from torch import Tensor + +from mmrotate.structures import norm_angle + + +@FUNCTION_REWRITER.register_rewriter( + func_name='mmrotate.models.dense_heads.rotated_rtmdet_head.' + 'RotatedRTMDetHead.predict_by_feat') +def rtmdet_head__predict_by_feat(ctx, + self, + cls_scores: List[Tensor], + bbox_preds: List[Tensor], + angle_preds: List[Tensor], + batch_img_metas: Optional[List[dict]] = None, + cfg: Optional[ConfigDict] = None, + rescale: bool = False, + with_nms: bool = True) -> List[InstanceData]: + """Rewrite `predict_by_feat` of `RTMDet` for default backend. + + Rewrite this function to deploy model, transform network output for a + batch into bbox predictions. + + Args: + ctx: Context that contains original meta information. + cls_scores (list[Tensor]): Classification scores for all + scale levels, each is a 4D-tensor, has shape + (batch_size, num_priors * num_classes, H, W). + bbox_preds (list[Tensor]): Box energies / deltas for all + scale levels, each is a 4D-tensor, has shape + (batch_size, num_priors * 4, H, W). + batch_img_metas (list[dict], Optional): Batch image meta info. + Defaults to None. + cfg (ConfigDict, optional): Test / postprocessing + configuration, if None, test_cfg would be used. + Defaults to None. + rescale (bool): If True, return boxes in original image space. + Defaults to False. + with_nms (bool): If True, do nms before return boxes. + Defaults to True. + + Returns: + tuple[Tensor, Tensor]: The first item is an (N, num_box, 5) tensor, + where 5 represent (tl_x, tl_y, br_x, br_y, score), N is batch + size and the score between 0 and 1. The shape of the second + tensor in the tuple is (N, num_box), and each element + represents the class label of the corresponding box. + """ + assert len(cls_scores) == len(bbox_preds) + device = cls_scores[0].device + cfg = self.test_cfg if cfg is None else cfg + batch_size = bbox_preds[0].shape[0] + featmap_sizes = [cls_score.shape[2:] for cls_score in cls_scores] + mlvl_priors = self.prior_generator.grid_priors( + featmap_sizes, device=device) + + flatten_cls_scores = [ + cls_score.permute(0, 2, 3, 1).reshape(batch_size, -1, + self.cls_out_channels) + for cls_score in cls_scores + ] + flatten_bbox_preds = [ + bbox_pred.permute(0, 2, 3, 1).reshape(batch_size, -1, 4) + for bbox_pred in bbox_preds + ] + flatten_angle_preds = [ + angle_pred.permute(0, 2, 3, 1).reshape(batch_size, -1, + self.angle_coder.encode_size) + for angle_pred in angle_preds + ] + flatten_cls_scores = torch.cat(flatten_cls_scores, dim=1).sigmoid() + flatten_bbox_preds = torch.cat(flatten_bbox_preds, dim=1) + flatten_angle_preds = torch.cat(flatten_angle_preds, dim=1) + priors = torch.cat(mlvl_priors) + + angle = self.angle_coder.decode(flatten_angle_preds, keepdim=True) + distance = flatten_bbox_preds + cos_angle, sin_angle = torch.cos(angle), torch.sin(angle) + + rot_matrix = torch.cat([cos_angle, -sin_angle, sin_angle, cos_angle], + dim=-1) + rot_matrix = rot_matrix.reshape(*rot_matrix.shape[:-1], 2, 2) + + wh = distance[..., :2] + distance[..., 2:] + offset_t = (distance[..., 2:] - distance[..., :2]) / 2 + offset_t = offset_t.unsqueeze(-1) + offset = torch.matmul(rot_matrix, offset_t).squeeze(-1) + ctr = priors[..., :2] + offset + + angle_regular = norm_angle(angle, self.angle_version) + bboxes = torch.cat([ctr, wh, angle_regular], dim=-1) + + # directly multiply score factor and feed to nms + max_scores, _ = torch.max(flatten_cls_scores, 1) + mask = max_scores >= cfg.score_thr + scores = flatten_cls_scores.where(mask, flatten_cls_scores.new_zeros(1)) + if not with_nms: + return bboxes, scores + + deploy_cfg = ctx.cfg + post_params = get_post_processing_params(deploy_cfg) + max_output_boxes_per_class = post_params.max_output_boxes_per_class + iou_threshold = cfg.nms.get('iou_threshold', post_params.iou_threshold) + score_threshold = cfg.get('score_thr', post_params.score_threshold) + pre_top_k = post_params.pre_top_k + keep_top_k = cfg.get('max_per_img', post_params.keep_top_k) + + return _multiclass_nms_rotated(bboxes, scores, max_output_boxes_per_class, + iou_threshold, score_threshold, pre_top_k, + keep_top_k) From 4ccf0366b46f389277f9c374bf3938732cdea1e0 Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Sat, 26 Nov 2022 22:46:03 +0800 Subject: [PATCH 25/52] fix hrsc config --- configs/rotated_rtmdet/_base_/dota_rr.py | 1 + configs/rotated_rtmdet/rotated_rtmdet_l-3x-hrsc.py | 2 +- configs/rotated_rtmdet/rotated_rtmdet_s-3x-hrsc.py | 2 +- configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-hrsc.py | 2 -- 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/configs/rotated_rtmdet/_base_/dota_rr.py b/configs/rotated_rtmdet/_base_/dota_rr.py index 96bd7ac6d..ce21fee63 100644 --- a/configs/rotated_rtmdet/_base_/dota_rr.py +++ b/configs/rotated_rtmdet/_base_/dota_rr.py @@ -104,6 +104,7 @@ outfile_prefix='./work_dirs/rtm_iou_test_coco/Task1') custom_hooks = [ + dict(type='mmdet.NumClassCheckHook'), dict( type='EMAHook', ema_type='mmdet.ExpMomentumEMA', diff --git a/configs/rotated_rtmdet/rotated_rtmdet_l-3x-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_l-3x-hrsc.py index 01faa26f1..f28d27617 100644 --- a/configs/rotated_rtmdet/rotated_rtmdet_l-3x-hrsc.py +++ b/configs/rotated_rtmdet/rotated_rtmdet_l-3x-hrsc.py @@ -32,7 +32,7 @@ bbox_head=dict( type='RotatedRTMDetSepBNHead', # type='RTMDetSepBNHead', - num_classes=15, + num_classes=1, in_channels=256, stacked_convs=2, feat_channels=256, diff --git a/configs/rotated_rtmdet/rotated_rtmdet_s-3x-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_s-3x-hrsc.py index 8e9604a23..0b55a5b5d 100644 --- a/configs/rotated_rtmdet/rotated_rtmdet_s-3x-hrsc.py +++ b/configs/rotated_rtmdet/rotated_rtmdet_s-3x-hrsc.py @@ -1,7 +1,7 @@ _base_ = './rotated_rtmdet_l-3x-hrsc.py' -# coco_ckpt = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth' # noqa checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' # noqa + model = dict( backbone=dict( deepen_factor=0.33, diff --git a/configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-hrsc.py index 5238867a8..b1b7ed172 100644 --- a/configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-hrsc.py +++ b/configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-hrsc.py @@ -1,10 +1,8 @@ _base_ = './rotated_rtmdet_l-3x-hrsc.py' -# coco_ckpt = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth' # noqa checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa model = dict( - # init_cfg=dict(type='Pretrained', checkpoint=coco_ckpt), backbone=dict( deepen_factor=0.167, widen_factor=0.375, From 058d442f0cb565fb33a87cb5f82ff0eed5c9024c Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Sat, 26 Nov 2022 22:52:31 +0800 Subject: [PATCH 26/52] remove deploy --- mmrotate/deploy/rotated_rtmdet_head.py | 119 ------------------------- 1 file changed, 119 deletions(-) delete mode 100644 mmrotate/deploy/rotated_rtmdet_head.py diff --git a/mmrotate/deploy/rotated_rtmdet_head.py b/mmrotate/deploy/rotated_rtmdet_head.py deleted file mode 100644 index d46f35b88..000000000 --- a/mmrotate/deploy/rotated_rtmdet_head.py +++ /dev/null @@ -1,119 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from typing import List, Optional - -import torch -from mmdeploy.codebase.mmdet import get_post_processing_params -from mmdeploy.codebase.mmrotate.core.post_processing.bbox_nms import \ - _multiclass_nms_rotated -from mmdeploy.core import FUNCTION_REWRITER -from mmengine.config import ConfigDict -from mmengine.structures import InstanceData -from torch import Tensor - -from mmrotate.structures import norm_angle - - -@FUNCTION_REWRITER.register_rewriter( - func_name='mmrotate.models.dense_heads.rotated_rtmdet_head.' - 'RotatedRTMDetHead.predict_by_feat') -def rtmdet_head__predict_by_feat(ctx, - self, - cls_scores: List[Tensor], - bbox_preds: List[Tensor], - angle_preds: List[Tensor], - batch_img_metas: Optional[List[dict]] = None, - cfg: Optional[ConfigDict] = None, - rescale: bool = False, - with_nms: bool = True) -> List[InstanceData]: - """Rewrite `predict_by_feat` of `RTMDet` for default backend. - - Rewrite this function to deploy model, transform network output for a - batch into bbox predictions. - - Args: - ctx: Context that contains original meta information. - cls_scores (list[Tensor]): Classification scores for all - scale levels, each is a 4D-tensor, has shape - (batch_size, num_priors * num_classes, H, W). - bbox_preds (list[Tensor]): Box energies / deltas for all - scale levels, each is a 4D-tensor, has shape - (batch_size, num_priors * 4, H, W). - batch_img_metas (list[dict], Optional): Batch image meta info. - Defaults to None. - cfg (ConfigDict, optional): Test / postprocessing - configuration, if None, test_cfg would be used. - Defaults to None. - rescale (bool): If True, return boxes in original image space. - Defaults to False. - with_nms (bool): If True, do nms before return boxes. - Defaults to True. - - Returns: - tuple[Tensor, Tensor]: The first item is an (N, num_box, 5) tensor, - where 5 represent (tl_x, tl_y, br_x, br_y, score), N is batch - size and the score between 0 and 1. The shape of the second - tensor in the tuple is (N, num_box), and each element - represents the class label of the corresponding box. - """ - assert len(cls_scores) == len(bbox_preds) - device = cls_scores[0].device - cfg = self.test_cfg if cfg is None else cfg - batch_size = bbox_preds[0].shape[0] - featmap_sizes = [cls_score.shape[2:] for cls_score in cls_scores] - mlvl_priors = self.prior_generator.grid_priors( - featmap_sizes, device=device) - - flatten_cls_scores = [ - cls_score.permute(0, 2, 3, 1).reshape(batch_size, -1, - self.cls_out_channels) - for cls_score in cls_scores - ] - flatten_bbox_preds = [ - bbox_pred.permute(0, 2, 3, 1).reshape(batch_size, -1, 4) - for bbox_pred in bbox_preds - ] - flatten_angle_preds = [ - angle_pred.permute(0, 2, 3, 1).reshape(batch_size, -1, - self.angle_coder.encode_size) - for angle_pred in angle_preds - ] - flatten_cls_scores = torch.cat(flatten_cls_scores, dim=1).sigmoid() - flatten_bbox_preds = torch.cat(flatten_bbox_preds, dim=1) - flatten_angle_preds = torch.cat(flatten_angle_preds, dim=1) - priors = torch.cat(mlvl_priors) - - angle = self.angle_coder.decode(flatten_angle_preds, keepdim=True) - distance = flatten_bbox_preds - cos_angle, sin_angle = torch.cos(angle), torch.sin(angle) - - rot_matrix = torch.cat([cos_angle, -sin_angle, sin_angle, cos_angle], - dim=-1) - rot_matrix = rot_matrix.reshape(*rot_matrix.shape[:-1], 2, 2) - - wh = distance[..., :2] + distance[..., 2:] - offset_t = (distance[..., 2:] - distance[..., :2]) / 2 - offset_t = offset_t.unsqueeze(-1) - offset = torch.matmul(rot_matrix, offset_t).squeeze(-1) - ctr = priors[..., :2] + offset - - angle_regular = norm_angle(angle, self.angle_version) - bboxes = torch.cat([ctr, wh, angle_regular], dim=-1) - - # directly multiply score factor and feed to nms - max_scores, _ = torch.max(flatten_cls_scores, 1) - mask = max_scores >= cfg.score_thr - scores = flatten_cls_scores.where(mask, flatten_cls_scores.new_zeros(1)) - if not with_nms: - return bboxes, scores - - deploy_cfg = ctx.cfg - post_params = get_post_processing_params(deploy_cfg) - max_output_boxes_per_class = post_params.max_output_boxes_per_class - iou_threshold = cfg.nms.get('iou_threshold', post_params.iou_threshold) - score_threshold = cfg.get('score_thr', post_params.score_threshold) - pre_top_k = post_params.pre_top_k - keep_top_k = cfg.get('max_per_img', post_params.keep_top_k) - - return _multiclass_nms_rotated(bboxes, scores, max_output_boxes_per_class, - iou_threshold, score_threshold, pre_top_k, - keep_top_k) From 99cf674bddf8b0d95feb07e34eb86c5774cc79c7 Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Thu, 1 Dec 2022 13:36:51 +0800 Subject: [PATCH 27/52] move distance2obb to transforms, remove useless code --- .../models/dense_heads/rotated_rtmdet_head.py | 70 +------- mmrotate/models/losses/__init__.py | 5 +- mmrotate/models/losses/prob_iou_loss.py | 136 -------------- mmrotate/models/losses/rd_iou_loss.py | 166 ------------------ mmrotate/structures/bbox/__init__.py | 4 +- mmrotate/structures/bbox/transforms.py | 32 ++++ 6 files changed, 36 insertions(+), 377 deletions(-) delete mode 100644 mmrotate/models/losses/prob_iou_loss.py delete mode 100644 mmrotate/models/losses/rd_iou_loss.py diff --git a/mmrotate/models/dense_heads/rotated_rtmdet_head.py b/mmrotate/models/dense_heads/rotated_rtmdet_head.py index f89cfb920..e63473141 100644 --- a/mmrotate/models/dense_heads/rotated_rtmdet_head.py +++ b/mmrotate/models/dense_heads/rotated_rtmdet_head.py @@ -18,29 +18,8 @@ from mmengine.structures import InstanceData from torch import Tensor, nn -from mmrotate.models.task_modules.coders import DistanceAnglePointCoder from mmrotate.registry import MODELS, TASK_UTILS -from mmrotate.structures import RotatedBoxes, norm_angle - - -# TODO move to mmrotate.structures.transform and update Coder -def distance2obb(points, distance, angle_version='oc'): - distance, angle = distance.split([4, 1], dim=-1) - - cos_angle, sin_angle = torch.cos(angle), torch.sin(angle) - - rot_matrix = torch.cat([cos_angle, -sin_angle, sin_angle, cos_angle], - dim=-1) - rot_matrix = rot_matrix.reshape(*rot_matrix.shape[:-1], 2, 2) - - wh = distance[..., :2] + distance[..., 2:] - offset_t = (distance[..., 2:] - distance[..., :2]) / 2 - offset_t = offset_t.unsqueeze(-1) - offset = torch.matmul(rot_matrix, offset_t).squeeze(-1) - ctr = points[..., :2] + offset - - angle_regular = norm_angle(angle, angle_version) - return torch.cat([ctr, wh, angle_regular], dim=-1) +from mmrotate.structures import RotatedBoxes, distance2obb @MODELS.register_module() @@ -869,50 +848,3 @@ def forward(self, feats: Tuple[Tensor, ...]) -> tuple: bbox_preds.append(reg_dist) angle_preds.append(angle_pred) return tuple(cls_scores), tuple(bbox_preds), tuple(angle_preds) - - -if __name__ == '__main__': - points = torch.tensor([[0., 0., 8., 8.], [8., 0., 8., 8.], - [16., 0., 8., 8.], [24., 0., 8., 8.]], - device='cuda:0') - - distances = torch.tensor([[[7.4215, 7.8629, 7.4568, 8.1447, -0.0224], - [7.3209, 7.7807, 7.4076, 8.1743, -0.0194], - [7.2929, 7.7480, 7.3624, 8.1829, -0.0228], - [7.3291, 7.7770, 7.3966, 8.2230, -0.0215]], - [[7.5713, 7.8529, 7.5189, 8.1220, -0.0212], - [7.6215, 7.8978, 7.5875, 8.2110, -0.0304], - [7.6464, 7.8849, 7.4770, 8.0470, -0.0372], - [7.5255, 7.8110, 7.4478, 8.1207, -0.0282]], - [[7.4363, 7.8605, 7.4451, 8.1569, -0.0239], - [7.3455, 7.7839, 7.4027, 8.1784, -0.0184], - [7.3022, 7.7584, 7.3663, 8.1751, -0.0232], - [7.3419, 7.7717, 7.3903, 8.2077, -0.0242]], - [[7.4416, 7.8393, 7.4837, 8.1546, -0.0213], - [7.3217, 7.7790, 7.3956, 8.1805, -0.0154], - [7.3080, 7.7377, 7.3658, 8.1779, -0.0205], - [7.3390, 7.7580, 7.4031, 8.2245, -0.0205]], - [[7.7741, 7.8088, 7.7546, 8.3303, -0.0151], - [7.7897, 7.9653, 7.9556, 8.3727, -0.0266], - [8.2531, 8.2622, 8.4759, 8.1064, -0.0506], - [8.1185, 7.9733, 8.1983, 8.3481, -0.0243]], - [[7.4850, 7.8193, 7.4702, 8.1592, -0.0248], - [7.3636, 7.7511, 7.4078, 8.1570, -0.0204], - [7.3184, 7.7122, 7.3942, 8.1611, -0.0243], - [7.3617, 7.7788, 7.4001, 8.2358, -0.0256]], - [[7.4395, 7.8696, 7.4530, 8.1517, -0.0246], - [7.3550, 7.7790, 7.4124, 8.1524, -0.0221], - [7.3377, 7.7614, 7.3752, 8.1712, -0.0226], - [7.3411, 7.7603, 7.4000, 8.2265, -0.0246]], - [[7.4344, 7.8780, 7.4388, 8.1534, -0.0247], - [7.3309, 7.8050, 7.3925, 8.1647, -0.0202], - [7.2904, 7.7461, 7.3651, 8.1827, -0.0221], - [7.3486, 7.7741, 7.4090, 8.2144, -0.0230]]], - device='cuda:0') - - res = distance2obb(points, distances, 'le90').reshape(-1, 5) - - c = DistanceAnglePointCoder(angle_version='le90') - res2 = c.decode(points.repeat(8, 1)[:, :2], distances.reshape(-1, 5)) - - print((res == res2).unique()) diff --git a/mmrotate/models/losses/__init__.py b/mmrotate/models/losses/__init__.py index a49cdd0ff..216543420 100644 --- a/mmrotate/models/losses/__init__.py +++ b/mmrotate/models/losses/__init__.py @@ -3,14 +3,11 @@ from .gaussian_dist_loss import GDLoss from .gaussian_dist_loss_v1 import GDLoss_v1 from .kf_iou_loss import KFLoss -from .prob_iou_loss import ProbIoULoss -from .rd_iou_loss import RDIoULoss from .rotated_iou_loss import RotatedIoULoss from .smooth_focal_loss import SmoothFocalLoss from .spatial_border_loss import SpatialBorderLoss __all__ = [ 'GDLoss', 'GDLoss_v1', 'KFLoss', 'ConvexGIoULoss', 'BCConvexGIoULoss', - 'SmoothFocalLoss', 'RotatedIoULoss', 'SpatialBorderLoss', 'RDIoULoss', - 'ProbIoULoss' + 'SmoothFocalLoss', 'RotatedIoULoss', 'SpatialBorderLoss' ] diff --git a/mmrotate/models/losses/prob_iou_loss.py b/mmrotate/models/losses/prob_iou_loss.py deleted file mode 100644 index d8acc0eef..000000000 --- a/mmrotate/models/losses/prob_iou_loss.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch -from mmdet.models import weighted_loss -from torch import nn - -from mmrotate.registry import MODELS - - -def gbb_form(boxes): - return torch.cat( - (boxes[:, :2], torch.pow(boxes[:, 2:4], 2) / 12, boxes[:, 4:]), 1) - - -def rotated_form(a_, b_, angles): - a = a_ * torch.pow(torch.cos(angles), 2.) + b_ * torch.pow( - torch.sin(angles), 2.) - b = a_ * torch.pow(torch.sin(angles), 2.) + b_ * torch.pow( - torch.cos(angles), 2.) - c = a_ * torch.cos(angles) * torch.sin(angles) - b_ * torch.sin( - angles) * torch.cos(angles) - return a, b, c - - -@weighted_loss -def probiou_loss(pred, target, eps=1e-3, mode='l1'): - """pred -> a matrix [N,5](x,y,w,h,angle) containing ours predicted box - target -> a matrix [N,5](x,y,w,h,angle) containing ours target box eps. - - -> threshold to avoid infinite values mode -> ('l1' in [0,1] or 'l2' in - [0,inf]) metrics according our paper. - """ - - gbboxes1 = gbb_form(pred) - gbboxes2 = gbb_form(target) - - (x1, y1, a1_, b1_, c1_) = (gbboxes1[:, 0], gbboxes1[:, 1], gbboxes1[:, 2], - gbboxes1[:, 3], gbboxes1[:, 4]) - (x2, y2, a2_, b2_, c2_) = (gbboxes2[:, 0], gbboxes2[:, 1], gbboxes2[:, 2], - gbboxes2[:, 3], gbboxes2[:, 4]) - - a1, b1, c1 = rotated_form(a1_, b1_, c1_) - a2, b2, c2 = rotated_form(a2_, b2_, c2_) - - t1 = (((a1 + a2) * (torch.pow(y1 - y2, 2)) + (b1 + b2) * - (torch.pow(x1 - x2, 2))) / ((a1 + a2) * (b1 + b2) - - (torch.pow(c1 + c2, 2)) + eps)) * 0.25 - t2 = (((c1 + c2) * (x2 - x1) * (y1 - y2)) / - ((a1 + a2) * (b1 + b2) - (torch.pow(c1 + c2, 2)) + eps)) * 0.5 - t3 = torch.log(((a1 + a2) * (b1 + b2) - (torch.pow(c1 + c2, 2))) / - (4 * torch.sqrt((a1 * b1 - torch.pow(c1, 2)) * - (a2 * b2 - torch.pow(c2, 2))) + eps) + - eps) * 0.5 - - B_d = t1 + t2 + t3 - - B_d = torch.clamp(B_d, eps, 100.0) - l1 = torch.sqrt(1.0 - torch.exp(-B_d) + eps) - l_i = torch.pow(l1, 2.0) - l2 = -torch.log(1.0 - l_i + eps) - - if mode == 'l1': - probiou = l1 - if mode == 'l2': - probiou = l2 - - return probiou - - -@MODELS.register_module() -class ProbIoULoss(nn.Module): - """RotatedIoULoss. - - Computing the IoU loss between a set of predicted rbboxes and - target rbboxes. - Args: - linear (bool): If True, use linear scale of loss else determined - by mode. Default: False. - eps (float): Eps to avoid log(0). - reduction (str): Options are "none", "mean" and "sum". - loss_weight (float): Weight of loss. - mode (str): Loss scaling mode, including "linear", "square", and "log". - Default: 'log' - """ - - def __init__(self, mode='l1', eps=1e-6, reduction='mean', loss_weight=1.0): - super(ProbIoULoss, self).__init__() - - self.mode = mode - self.eps = eps - self.reduction = reduction - self.loss_weight = loss_weight - - def forward(self, - pred, - target, - weight=None, - avg_factor=None, - reduction_override=None, - **kwargs): - """Forward function. - - Args: - pred (torch.Tensor): The prediction. - target (torch.Tensor): The learning target of the prediction. - weight (torch.Tensor, optional): The weight of loss for each - prediction. Defaults to None. - avg_factor (int, optional): Average factor that is used to average - the loss. Defaults to None. - reduction_override (str, optional): The reduction method used to - override the original reduction method of the loss. - Defaults to None. Options are "none", "mean" and "sum". - """ - assert reduction_override in (None, 'none', 'mean', 'sum') - reduction = ( - reduction_override if reduction_override else self.reduction) - if (weight is not None) and (not torch.any(weight > 0)) and ( - reduction != 'none'): - if pred.dim() == weight.dim() + 1: - weight = weight.unsqueeze(1) - return (pred * weight).sum() # 0 - if weight is not None and weight.dim() > 1: - # TODO: remove this in the future - # reduce the weight of shape (n, 5) to (n,) to match the - # iou_loss of shape (n,) - assert weight.shape == pred.shape - weight = weight.mean(-1) - loss = self.loss_weight * probiou_loss( - pred, - target, - weight, - mode=self.mode, - eps=self.eps, - reduction=reduction, - avg_factor=avg_factor, - **kwargs) - return loss diff --git a/mmrotate/models/losses/rd_iou_loss.py b/mmrotate/models/losses/rd_iou_loss.py deleted file mode 100644 index 6d971aa00..000000000 --- a/mmrotate/models/losses/rd_iou_loss.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import warnings - -import numpy as np -import torch -import torch.nn as nn -from mmdet.models.losses.utils import weighted_loss - -from mmrotate.registry import MODELS -from mmrotate.structures import norm_angle - - -@weighted_loss -def rd_iou_loss(pred, target, linear=False, mode='log', eps=1e-6): - """Rotated IoU loss. - - Computing the IoU loss between a set of predicted rbboxes and target - rbboxes. - The loss is calculated as negative log of IoU. - - Args: - pred (torch.Tensor): Predicted bboxes of format (x, y, h, w, angle), - shape (n, 5). - target (torch.Tensor): Corresponding gt bboxes, shape (n, 5). - linear (bool, optional): If True, use linear scale of loss instead of - log scale. Default: False. - mode (str): Loss scaling mode, including "linear", "square", and "log". - Default: 'log' - eps (float): Eps to avoid log(0). - Return: - torch.Tensor: Loss tensor. - """ - assert mode in ['linear', 'square', 'log'] - if linear: - mode = 'linear' - warnings.warn( - 'DeprecationWarning: Setting "linear=True" in ' - 'poly_iou_loss is deprecated, please use "mode=`linear`" ' - 'instead.') - - pred_x, pred_y, pred_w, pred_h, pred_t = pred.split([1, 1, 1, 1, 1], - dim=-1) - target_x, target_y, target_w, target_h, target_t = target.split( - [1, 1, 1, 1, 1], dim=-1) - - target_z = torch.zeros_like(target_t) - target_l = torch.ones_like(target_t) * 0.5 * np.pi - - pred_z = torch.ones_like(pred_t) * norm_angle(pred_t - target_t, 'le90') - pred_l = torch.ones_like(pred_t) * 0.5 * np.pi - - area_pred = pred_w * pred_h * pred_l - area_target = target_w * target_h * target_l - - union = ( - f(pred_x, target_x, pred_w, target_w) * - f(pred_y, target_y, pred_h, target_h) * - f(pred_z, target_z, pred_l, target_l)) - - ious = union / (area_pred + area_target - union) - - enclose_area = ( - f2(pred_x, target_x, pred_w, target_w) * - f2(pred_y, target_y, pred_h, target_h) * - f2(pred_z, target_z, pred_l, target_l)) - - gious = ious - (enclose_area - union) / enclose_area - - # ious = ious.squeeze(0).clamp(min=eps) - - loss = 1 - gious.squeeze(-1) - - return loss - - -def f(x1, x2, w1, w2): - ff = torch.min(x1 + 0.5 * w1, x2 + 0.5 * w2) - torch.max( - x1 - 0.5 * w1, x2 - 0.5 * w2) - return ff.clamp(min=0) - - -def f2(x1, x2, w1, w2): - ff = torch.max(x1 + 0.5 * w1, x2 + 0.5 * w2) - torch.min( - x1 - 0.5 * w1, x2 - 0.5 * w2) - return ff.clamp(min=0) - - -@MODELS.register_module() -class RDIoULoss(nn.Module): - """RotatedIoULoss. - - Computing the IoU loss between a set of predicted rbboxes and - target rbboxes. - Args: - linear (bool): If True, use linear scale of loss else determined - by mode. Default: False. - eps (float): Eps to avoid log(0). - reduction (str): Options are "none", "mean" and "sum". - loss_weight (float): Weight of loss. - mode (str): Loss scaling mode, including "linear", "square", and "log". - Default: 'log' - """ - - def __init__(self, - linear=False, - eps=1e-6, - reduction='mean', - loss_weight=1.0, - mode='log'): - super(RDIoULoss, self).__init__() - assert mode in ['linear', 'square', 'log'] - if linear: - mode = 'linear' - warnings.warn('DeprecationWarning: Setting "linear=True" in ' - 'IOULoss is deprecated, please use "mode=`linear`" ' - 'instead.') - self.mode = mode - self.linear = linear - self.eps = eps - self.reduction = reduction - self.loss_weight = loss_weight - - def forward(self, - pred, - target, - weight=None, - avg_factor=None, - reduction_override=None, - **kwargs): - """Forward function. - - Args: - pred (torch.Tensor): The prediction. - target (torch.Tensor): The learning target of the prediction. - weight (torch.Tensor, optional): The weight of loss for each - prediction. Defaults to None. - avg_factor (int, optional): Average factor that is used to average - the loss. Defaults to None. - reduction_override (str, optional): The reduction method used to - override the original reduction method of the loss. - Defaults to None. Options are "none", "mean" and "sum". - """ - assert reduction_override in (None, 'none', 'mean', 'sum') - reduction = ( - reduction_override if reduction_override else self.reduction) - if (weight is not None) and (not torch.any(weight > 0)) and ( - reduction != 'none'): - if pred.dim() == weight.dim() + 1: - weight = weight.unsqueeze(1) - return (pred * weight).sum() # 0 - if weight is not None and weight.dim() > 1: - # TODO: remove this in the future - # reduce the weight of shape (n, 5) to (n,) to match the - # iou_loss of shape (n,) - assert weight.shape == pred.shape - weight = weight.mean(-1) - loss = self.loss_weight * rd_iou_loss( - pred, - target, - weight, - mode=self.mode, - eps=self.eps, - reduction=reduction, - avg_factor=avg_factor, - **kwargs) - return loss diff --git a/mmrotate/structures/bbox/__init__.py b/mmrotate/structures/bbox/__init__.py index 98e3a809b..895ade012 100644 --- a/mmrotate/structures/bbox/__init__.py +++ b/mmrotate/structures/bbox/__init__.py @@ -4,10 +4,10 @@ rbox2hbox, rbox2qbox) from .quadri_boxes import QuadriBoxes from .rotated_boxes import RotatedBoxes -from .transforms import gaussian2bbox, gt2gaussian, norm_angle +from .transforms import distance2obb, gaussian2bbox, gt2gaussian, norm_angle __all__ = [ 'QuadriBoxes', 'RotatedBoxes', 'hbox2rbox', 'hbox2qbox', 'rbox2hbox', 'rbox2qbox', 'qbox2hbox', 'qbox2rbox', 'gaussian2bbox', 'gt2gaussian', - 'norm_angle', 'rbbox_overlaps', 'fake_rbbox_overlaps' + 'norm_angle', 'rbbox_overlaps', 'fake_rbbox_overlaps', 'distance2obb' ] diff --git a/mmrotate/structures/bbox/transforms.py b/mmrotate/structures/bbox/transforms.py index 6d0d72a12..83c74a620 100644 --- a/mmrotate/structures/bbox/transforms.py +++ b/mmrotate/structures/bbox/transforms.py @@ -78,3 +78,35 @@ def gt2gaussian(target): R = torch.stack([cos_sin * neg, cos_sin[..., [1, 0]]], dim=-2) return (center, R.matmul(diag).matmul(R.transpose(-1, -2))) + + +def distance2obb(points: torch.Tensor, + distance: torch.Tensor, + angle_version: str = 'oc'): + """Convert distance angle to rotated boxes. + + Args: + points (Tensor): Shape (B, N, 2) or (N, 2). + distance (Tensor): Distance from the given point to 4 + boundaries and angle (left, top, right, bottom, angle). + Shape (B, N, 5) or (N, 5) + angle_version: angle representations. + Returns: + dict[str, torch.Tensor]: Gaussian distributions. + """ + distance, angle = distance.split([4, 1], dim=-1) + + cos_angle, sin_angle = torch.cos(angle), torch.sin(angle) + + rot_matrix = torch.cat([cos_angle, -sin_angle, sin_angle, cos_angle], + dim=-1) + rot_matrix = rot_matrix.reshape(*rot_matrix.shape[:-1], 2, 2) + + wh = distance[..., :2] + distance[..., 2:] + offset_t = (distance[..., 2:] - distance[..., :2]) / 2 + offset_t = offset_t.unsqueeze(-1) + offset = torch.matmul(rot_matrix, offset_t).squeeze(-1) + ctr = points[..., :2] + offset + + angle_regular = norm_angle(angle, angle_version) + return torch.cat([ctr, wh, angle_regular], dim=-1) From f2860b400d93b99204c1b277ad7efe74240f142e Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Thu, 1 Dec 2022 13:56:25 +0800 Subject: [PATCH 28/52] refactor configs --- configs/rotated_rtmdet/README.md | 39 ++++ .../rotated_rtmdet/_base_/default_runtime.py | 12 +- .../_base_/dota_mixup_mosaic.py | 158 --------------- configs/rotated_rtmdet/_base_/dota_rr.py | 60 +++--- configs/rotated_rtmdet/_base_/dota_rr_ms.py | 103 ++++++++++ .../_base_/hrsc_mosaic_mixup.py | 132 ------------- configs/rotated_rtmdet/_base_/hrsc_rr.py | 22 +-- .../rotated_rtmdet/_base_/schedule_100e.py | 34 ---- .../rotated_rtmdet_l-100e-aug-dota.py | 185 ++++++++++++++++++ .../rotated_rtmdet_l-300e-aug-hrsc.py | 183 +++++++++++++++++ .../rotated_rtmdet_l-3x-dota.py | 14 +- ...hrsc.py => rotated_rtmdet_l-3x-dota_ms.py} | 18 +- .../rotated_rtmdet_l-9x-hrsc.py | 105 ++++++++++ ...tated_rtmdet_l-coco_pretrain-3x-dota_ms.py | 17 ++ .../rotated_rtmdet_m-3x-dota.py | 18 ++ .../rotated_rtmdet_m-3x-dota_ms.py | 18 ++ .../rotated_rtmdet_m-3x-hrsc.py | 7 - ...3x-hrsc.py => rotated_rtmdet_s-3x-dota.py} | 12 +- .../rotated_rtmdet_s-3x-dota_ms.py | 20 ++ .../rotated_rtmdet_tiny-300e-aug-hrsc.py | 92 +++++++++ .../rotated_rtmdet_tiny-3x-dota.py | 12 +- ...c.py => rotated_rtmdet_tiny-3x-dota_ms.py} | 9 +- ...hrsc.py => rotated_rtmdet_tiny-9x-hrsc.py} | 2 +- .../rotated_rtmdet_x-3x-hrsc.py | 8 - 24 files changed, 865 insertions(+), 415 deletions(-) create mode 100644 configs/rotated_rtmdet/README.md delete mode 100644 configs/rotated_rtmdet/_base_/dota_mixup_mosaic.py create mode 100644 configs/rotated_rtmdet/_base_/dota_rr_ms.py delete mode 100644 configs/rotated_rtmdet/_base_/hrsc_mosaic_mixup.py delete mode 100644 configs/rotated_rtmdet/_base_/schedule_100e.py create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota.py create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_l-300e-aug-hrsc.py rename configs/rotated_rtmdet/{rotated_rtmdet_l-3x-hrsc.py => rotated_rtmdet_l-3x-dota_ms.py} (83%) create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_l-9x-hrsc.py create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_l-coco_pretrain-3x-dota_ms.py create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_m-3x-dota.py create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_m-3x-dota_ms.py delete mode 100644 configs/rotated_rtmdet/rotated_rtmdet_m-3x-hrsc.py rename configs/rotated_rtmdet/{rotated_rtmdet_s-3x-hrsc.py => rotated_rtmdet_s-3x-dota.py} (54%) create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_s-3x-dota_ms.py create mode 100644 configs/rotated_rtmdet/rotated_rtmdet_tiny-300e-aug-hrsc.py rename configs/rotated_rtmdet/{rotated_rtmdet_tiny_probiou-3x-hrsc.py => rotated_rtmdet_tiny-3x-dota_ms.py} (59%) rename configs/rotated_rtmdet/{rotated_rtmdet_tiny-3x-hrsc.py => rotated_rtmdet_tiny-9x-hrsc.py} (93%) delete mode 100644 configs/rotated_rtmdet/rotated_rtmdet_x-3x-hrsc.py diff --git a/configs/rotated_rtmdet/README.md b/configs/rotated_rtmdet/README.md new file mode 100644 index 000000000..8e2bd6019 --- /dev/null +++ b/configs/rotated_rtmdet/README.md @@ -0,0 +1,39 @@ +# RTMDet-R + + + +## Abstract + +Our tech-report will be released soon. + +
+ +
+ +## Results and Models + +### DOTA-v1.0 + +| Backbone | size | pretrain | schedule | Aug | mAP | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) | Config | Download | +| :---------: | :--: | :------: | :------: | :---: | :---: | :-------: | :------: | :------------------: | :------------------------------------------------------: | :-----------------------: | +| RTMDet-tiny | 1024 | IMP | 3x | RR | 75.60 | 4.88 | 20.45 | 4.40 | [config](./rotated_rtmdet_tiny-3x-dota.py) | [model](<>) \\ [log](<>) | +| RTMDet-tiny | 1024 | IMP | 3x | MS+RR | 79.82 | 4.88 | 20.45 | 4.46 | [config](./rotated_rtmdet_tiny-3x-dota_ms.py) | [model](<>) \\ [log](<>) | +| RTMDet-s | 1024 | IMP | 3x | RR | 76.93 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota.py) | [model](<>) \\ [log](<>) | +| RTMDet-s | 1024 | IMP | 3x | MS+RR | 79.98 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota_ms.py) | [model](<>) \\ [log](<>) | +| RTMDet-m | 1024 | IMP | 3x | RR | 78.24 | 24.67 | 99.76 | 7.82 | [config](./rotated_rtmdet_m-3x-dota.py) | [model](<>) \\ [log](<>) | +| RTMDet-m | 1024 | IMP | 3x | MS+RR | 80.26 | 24.67 | 99.76 | 7.82 | [config](./rotated_rtmdet_m-3x-dota_ms.py) | [model](<>) \\ [log](<>) | +| RTMDet-l | 1024 | IMP | 3x | RR | 78.85 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-3x-dota.py) | [model](<>) \\ [log](<>) | +| RTMDet-l | 1024 | IMP | 3x | MS+RR | 80.54 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-3x-dota_ms.py) | [model](<>) \\ [log](<>) | +| RTMDet-l | 1024 | COP | 3x | MS+RR | 81.33 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-coco_pretrain-3x-dota_ms.py) | [model](<>) \\ [log](<>) | + +### HRSC + +| Backbone | size | pretrain | schedule | Aug | mAP07 | mAP12 | Params(M) | FLOPS(G) | Config | Download | +| :---------: | :--: | :------: | :------: | :-: | :---: | :---: | :-------: | :------: | :----------------------------------------: | :-----------------------: | +| RTMDet-tiny | 800 | IMP | 9x | RR | 90.6 | 97.1 | 4.88 | 12.54 | [config](./rotated_rtmdet_tiny-9x-hrsc.py) | [model](<>) \\ [log](<>) | + +**Note**: + +1. The inference speed is measured on an NVIDIA 2080Ti GPU with TensorRT 8.4.3, cuDNN 8.2.0, FP16, batch size=1, and + without NMS. +2. We also provide config with mixup and mosaic for longer schedule. diff --git a/configs/rotated_rtmdet/_base_/default_runtime.py b/configs/rotated_rtmdet/_base_/default_runtime.py index 0a91907ef..6a53c9901 100644 --- a/configs/rotated_rtmdet/_base_/default_runtime.py +++ b/configs/rotated_rtmdet/_base_/default_runtime.py @@ -4,7 +4,7 @@ timer=dict(type='IterTimerHook'), logger=dict(type='LoggerHook', interval=50), param_scheduler=dict(type='ParamSchedulerHook'), - checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=3), + checkpoint=dict(type='CheckpointHook', interval=12, max_keep_ckpts=3), sampler_seed=dict(type='DistSamplerSeedHook'), visualization=dict(type='mmdet.DetVisualizationHook')) @@ -22,3 +22,13 @@ log_level = 'INFO' load_from = None resume = False + +custom_hooks = [ + dict(type='mmdet.NumClassCheckHook'), + dict( + type='EMAHook', + ema_type='mmdet.ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49) +] diff --git a/configs/rotated_rtmdet/_base_/dota_mixup_mosaic.py b/configs/rotated_rtmdet/_base_/dota_mixup_mosaic.py deleted file mode 100644 index 998a45f57..000000000 --- a/configs/rotated_rtmdet/_base_/dota_mixup_mosaic.py +++ /dev/null @@ -1,158 +0,0 @@ -# dataset settings -dataset_type = 'DOTADataset' -# data_root = '/home/wangchen/liuyanyi/datasets/dota_mmrotate_ss/' -data_root = '/datasets/dota_mmrotate_ss/' -file_client_args = dict(backend='disk') - -train_pipeline = [ - dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), - dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), - dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), - dict( - type='mmdet.CachedMosaic', - img_scale=(1024, 1024), - pad_val=114.0, - max_cached_images=20, - random_pop=False), - dict( - type='mmdet.RandomResize', - resize_type='mmdet.Resize', - scale=(2048, 2048), - ratio_range=(0.5, 2.0), - keep_ratio=True), - dict(type='RandomRotate', prob=0.5, angle_range=180), - dict(type='mmdet.RandomCrop', crop_size=(1024, 1024)), - dict(type='mmdet.YOLOXHSVRandomAug'), - dict( - type='mmdet.RandomFlip', - prob=0.75, - direction=['horizontal', 'vertical', 'diagonal']), - dict( - type='mmdet.Pad', size=(1024, 1024), - pad_val=dict(img=(114, 114, 114))), - dict( - type='mmdet.CachedMixUp', - img_scale=(1024, 1024), - ratio_range=(1.0, 1.0), - max_cached_images=10, - random_pop=False, - pad_val=(114, 114, 114), - prob=0.5), - dict(type='mmdet.PackDetInputs') -] - -train_pipeline_stage2 = [ - dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), - dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), - dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), - dict( - type='mmdet.RandomResize', - resize_type='mmdet.Resize', - scale=(1024, 1024), - ratio_range=(0.5, 2.0), - keep_ratio=True), - dict(type='RandomRotate', prob=0.5, angle_range=180), - dict(type='mmdet.RandomCrop', crop_size=(1024, 1024)), - dict(type='mmdet.YOLOXHSVRandomAug'), - dict( - type='mmdet.RandomFlip', - prob=0.75, - direction=['horizontal', 'vertical', 'diagonal']), - dict( - type='mmdet.Pad', size=(1024, 1024), - pad_val=dict(img=(114, 114, 114))), - dict(type='mmdet.PackDetInputs') -] - -val_pipeline = [ - dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), - dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True), - # avoid bboxes being resized - dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), - dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), - dict( - type='mmdet.Pad', size=(1024, 1024), - pad_val=dict(img=(114, 114, 114))), - dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor')) -] -test_pipeline = [ - dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), - dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True), - dict( - type='mmdet.Pad', size=(1024, 1024), - pad_val=dict(img=(114, 114, 114))), - dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor')) -] -train_dataloader = dict( - batch_size=8, - num_workers=8, - persistent_workers=True, - sampler=dict(type='DefaultSampler', shuffle=True), - batch_sampler=None, - pin_memory=True, - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file='trainval/annfiles/', - data_prefix=dict(img_path='trainval/images/'), - img_shape=(1024, 1024), - filter_cfg=dict(filter_empty_gt=True), - pipeline=train_pipeline)) -val_dataloader = dict( - batch_size=1, - num_workers=2, - persistent_workers=True, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file='val/annfiles/', - data_prefix=dict(img_path='val/images/'), - img_shape=(1024, 1024), - test_mode=True, - pipeline=val_pipeline)) -# test_dataloader = val_dataloader - -val_evaluator = dict(type='DOTAMetric', metric='mAP') -# test_evaluator = val_evaluator - -# inference on test dataset and format the output results -# for submission. Note: the test set has no annotation. -test_dataloader = dict( - batch_size=8, - num_workers=8, - persistent_workers=False, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=dict( - type=dataset_type, - data_root=data_root, - data_prefix=dict(img_path='test/images/'), - img_shape=(1024, 1024), - test_mode=True, - pipeline=test_pipeline)) -test_evaluator = dict( - type='DOTAMetric', - format_only=True, - merge_patches=True, - outfile_prefix='./work_dirs/rtm_iou_test_coco/Task1') - -custom_hooks = [ - dict( - type='EMAHook', - ema_type='mmdet.ExpMomentumEMA', - momentum=0.0002, - update_buffers=True, - priority=49), - dict( - type='mmdet.PipelineSwitchHook', - switch_epoch=30, - switch_pipeline=train_pipeline_stage2) -] diff --git a/configs/rotated_rtmdet/_base_/dota_rr.py b/configs/rotated_rtmdet/_base_/dota_rr.py index ce21fee63..dbc854e3b 100644 --- a/configs/rotated_rtmdet/_base_/dota_rr.py +++ b/configs/rotated_rtmdet/_base_/dota_rr.py @@ -1,7 +1,7 @@ # dataset settings dataset_type = 'DOTADataset' -# data_root = '/home/wangchen/liuyanyi/datasets/dota_mmrotate_ss/' -data_root = '/datasets/dota_mmrotate_ss/' +data_root = 'data/split_ss_dota/' + file_client_args = dict(backend='disk') train_pipeline = [ @@ -54,7 +54,7 @@ persistent_workers=True, sampler=dict(type='DefaultSampler', shuffle=True), batch_sampler=None, - pin_memory=True, + pin_memory=False, dataset=dict( type=dataset_type, data_root=data_root, @@ -72,43 +72,33 @@ dataset=dict( type=dataset_type, data_root=data_root, - ann_file='val/annfiles/', - data_prefix=dict(img_path='val/images/'), + ann_file='trainval/annfiles/', + data_prefix=dict(img_path='trainval/images/'), img_shape=(1024, 1024), test_mode=True, pipeline=val_pipeline)) -# test_dataloader = val_dataloader +test_dataloader = val_dataloader val_evaluator = dict(type='DOTAMetric', metric='mAP') -# test_evaluator = val_evaluator +test_evaluator = val_evaluator # inference on test dataset and format the output results # for submission. Note: the test set has no annotation. -test_dataloader = dict( - batch_size=8, - num_workers=8, - persistent_workers=False, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=dict( - type=dataset_type, - data_root=data_root, - data_prefix=dict(img_path='test/images/'), - img_shape=(1024, 1024), - test_mode=True, - pipeline=test_pipeline)) -test_evaluator = dict( - type='DOTAMetric', - format_only=True, - merge_patches=True, - outfile_prefix='./work_dirs/rtm_iou_test_coco/Task1') - -custom_hooks = [ - dict(type='mmdet.NumClassCheckHook'), - dict( - type='EMAHook', - ema_type='mmdet.ExpMomentumEMA', - momentum=0.0002, - update_buffers=True, - priority=49) -] +# test_dataloader = dict( +# batch_size=8, +# num_workers=8, +# persistent_workers=False, +# drop_last=False, +# sampler=dict(type='DefaultSampler', shuffle=False), +# dataset=dict( +# type=dataset_type, +# data_root=data_root, +# data_prefix=dict(img_path='test/images/'), +# img_shape=(1024, 1024), +# test_mode=True, +# pipeline=test_pipeline)) +# test_evaluator = dict( +# type='DOTAMetric', +# format_only=True, +# merge_patches=True, +# outfile_prefix='./work_dirs/rtmdet_r/Task1') diff --git a/configs/rotated_rtmdet/_base_/dota_rr_ms.py b/configs/rotated_rtmdet/_base_/dota_rr_ms.py new file mode 100644 index 000000000..c75bb2c8f --- /dev/null +++ b/configs/rotated_rtmdet/_base_/dota_rr_ms.py @@ -0,0 +1,103 @@ +# dataset settings +dataset_type = 'DOTADataset' +data_root = 'data/split_ms_dota/' +file_client_args = dict(backend='disk') + +train_pipeline = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True), + dict( + type='mmdet.RandomFlip', + prob=0.75, + direction=['horizontal', 'vertical', 'diagonal']), + dict( + type='RandomRotate', + prob=0.5, + angle_range=180, + rect_obj_labels=[9, 11]), + dict( + type='mmdet.Pad', size=(1024, 1024), + pad_val=dict(img=(114, 114, 114))), + dict(type='mmdet.PackDetInputs') +] +val_pipeline = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True), + # avoid bboxes being resized + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict( + type='mmdet.Pad', size=(1024, 1024), + pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] +test_pipeline = [ + dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), + dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True), + dict( + type='mmdet.Pad', size=(1024, 1024), + pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] +train_dataloader = dict( + batch_size=8, + num_workers=8, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + batch_sampler=None, + pin_memory=False, + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='trainval/annfiles/', + data_prefix=dict(img_path='trainval/images/'), + img_shape=(1024, 1024), + filter_cfg=dict(filter_empty_gt=True), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='trainval/annfiles/', + data_prefix=dict(img_path='trainval/images/'), + img_shape=(1024, 1024), + test_mode=True, + pipeline=val_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict(type='DOTAMetric', metric='mAP') +test_evaluator = val_evaluator + +# inference on test dataset and format the output results +# for submission. Note: the test set has no annotation. +# test_dataloader = dict( +# batch_size=8, +# num_workers=8, +# persistent_workers=False, +# drop_last=False, +# sampler=dict(type='DefaultSampler', shuffle=False), +# dataset=dict( +# type=dataset_type, +# data_root=data_root, +# data_prefix=dict(img_path='test/images/'), +# img_shape=(1024, 1024), +# test_mode=True, +# pipeline=test_pipeline)) +# test_evaluator = dict( +# type='DOTAMetric', +# format_only=True, +# merge_patches=True, +# outfile_prefix='./work_dirs/rtmdet_r/Task1') diff --git a/configs/rotated_rtmdet/_base_/hrsc_mosaic_mixup.py b/configs/rotated_rtmdet/_base_/hrsc_mosaic_mixup.py deleted file mode 100644 index 519c9c1dd..000000000 --- a/configs/rotated_rtmdet/_base_/hrsc_mosaic_mixup.py +++ /dev/null @@ -1,132 +0,0 @@ -# dataset settings -dataset_type = 'HRSCDataset' -data_root = '/home/wangchen/liuyanyi/datasets/hrsc/' -file_client_args = dict(backend='disk') - -train_pipeline = [ - dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), - dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), - dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), - dict( - type='mmdet.CachedMosaic', - img_scale=(800, 800), - pad_val=114.0, - max_cached_images=20, - random_pop=False), - dict( - type='mmdet.RandomResize', - resize_type='mmdet.Resize', - scale=(1600, 1600), - ratio_range=(0.5, 2.0), - keep_ratio=True), - dict(type='RandomRotate', prob=0.5, angle_range=180), - dict(type='mmdet.RandomCrop', crop_size=(800, 800)), - dict(type='mmdet.YOLOXHSVRandomAug'), - dict( - type='mmdet.RandomFlip', - prob=0.75, - direction=['horizontal', 'vertical', 'diagonal']), - dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), - dict( - type='mmdet.CachedMixUp', - img_scale=(800, 800), - ratio_range=(1.0, 1.0), - max_cached_images=10, - random_pop=False, - pad_val=(114, 114, 114), - prob=0.5), - dict(type='mmdet.PackDetInputs') -] - -train_pipeline_stage2 = [ - dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), - dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), - dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), - dict( - type='mmdet.RandomResize', - resize_type='mmdet.Resize', - scale=(800, 800), - ratio_range=(0.5, 2.0), - keep_ratio=True), - dict(type='RandomRotate', prob=0.5, angle_range=180), - dict(type='mmdet.RandomCrop', crop_size=(800, 800)), - dict(type='mmdet.YOLOXHSVRandomAug'), - dict( - type='mmdet.RandomFlip', - prob=0.75, - direction=['horizontal', 'vertical', 'diagonal']), - dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), - dict(type='mmdet.PackDetInputs') -] - -val_pipeline = [ - dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), - dict(type='mmdet.Resize', scale=(800, 800), keep_ratio=True), - # avoid bboxes being resized - dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), - dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), - dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), - dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor')) -] -test_pipeline = [ - dict(type='mmdet.LoadImageFromFile', file_client_args=file_client_args), - dict(type='mmdet.Resize', scale=(800, 800), keep_ratio=True), - dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), - dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor')) -] -train_dataloader = dict( - batch_size=8, - num_workers=8, - persistent_workers=True, - sampler=dict(type='DefaultSampler', shuffle=True), - batch_sampler=None, - pin_memory=True, - dataset=dict( - type='RepeatDataset', - times=3, - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file='ImageSets/trainval.txt', - data_prefix=dict(sub_data_root='FullDataSet/'), - filter_cfg=dict(filter_empty_gt=True), - pipeline=train_pipeline))) -val_dataloader = dict( - batch_size=1, - num_workers=2, - persistent_workers=True, - drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=dict( - type=dataset_type, - data_root=data_root, - ann_file='ImageSets/test.txt', - data_prefix=dict(sub_data_root='FullDataSet/'), - test_mode=True, - pipeline=val_pipeline)) -test_dataloader = val_dataloader - -val_evaluator = dict( - type='DOTAMetric', - iou_thrs=[0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95], - metric='mAP') -test_evaluator = val_evaluator - -custom_hooks = [ - dict( - type='EMAHook', - ema_type='mmdet.ExpMomentumEMA', - momentum=0.0002, - update_buffers=True, - priority=49), - dict( - type='mmdet.PipelineSwitchHook', - switch_epoch=90, - switch_pipeline=train_pipeline_stage2) -] diff --git a/configs/rotated_rtmdet/_base_/hrsc_rr.py b/configs/rotated_rtmdet/_base_/hrsc_rr.py index 51e0b7b73..d2518ea39 100644 --- a/configs/rotated_rtmdet/_base_/hrsc_rr.py +++ b/configs/rotated_rtmdet/_base_/hrsc_rr.py @@ -1,6 +1,6 @@ # dataset settings dataset_type = 'HRSCDataset' -data_root = '/home/wangchen/liuyanyi/datasets/hrsc/' +data_root = 'data/hrsc/' file_client_args = dict(backend='disk') train_pipeline = [ @@ -69,17 +69,13 @@ pipeline=val_pipeline)) test_dataloader = val_dataloader -val_evaluator = dict( - type='DOTAMetric', - iou_thrs=[0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95], - metric='mAP') -test_evaluator = val_evaluator - -custom_hooks = [ +val_evaluator = [ dict( - type='EMAHook', - ema_type='mmdet.ExpMomentumEMA', - momentum=0.0002, - update_buffers=True, - priority=49) + type='DOTAMetric', + eval_mode='11points', + prefix='dota_ap07', + metric='mAP'), + dict( + type='DOTAMetric', eval_mode='area', prefix='dota_ap12', metric='mAP'), ] +test_evaluator = val_evaluator diff --git a/configs/rotated_rtmdet/_base_/schedule_100e.py b/configs/rotated_rtmdet/_base_/schedule_100e.py deleted file mode 100644 index c0ca01a0d..000000000 --- a/configs/rotated_rtmdet/_base_/schedule_100e.py +++ /dev/null @@ -1,34 +0,0 @@ -max_epochs = 100 -base_lr = 0.004 / 16 -interval = 20 - -train_cfg = dict( - type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=interval) -val_cfg = dict(type='ValLoop') -test_cfg = dict(type='TestLoop') - -# learning rate -param_scheduler = [ - dict( - type='LinearLR', - start_factor=1.0e-5, - by_epoch=False, - begin=0, - end=1000), - dict( - # use cosine lr from 150 to 300 epoch - type='CosineAnnealingLR', - eta_min=base_lr * 0.05, - begin=max_epochs // 2, - end=max_epochs, - T_max=max_epochs // 2, - by_epoch=True, - convert_to_iter_based=True), -] - -# optimizer -optim_wrapper = dict( - type='OptimWrapper', - optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), - paramwise_cfg=dict( - norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota.py b/configs/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota.py new file mode 100644 index 000000000..cad3da51f --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota.py @@ -0,0 +1,185 @@ +_base_ = [ + './_base_/default_runtime.py', './_base_/schedule_3x.py', + './_base_/dota_rr.py' +] +checkpoint = './work_dirs/cspnext_rsb_pretrain/cspnext-l_8xb256-rsb-a1-600e_in1k/cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth' # noqa + +angle_version = 'le90' +model = dict( + type='mmdet.RTMDet', + data_preprocessor=dict( + type='mmdet.DetDataPreprocessor', + mean=[103.53, 116.28, 123.675], + std=[57.375, 57.12, 58.395], + bgr_to_rgb=False, + boxtype2tensor=False, + batch_augments=None), + backbone=dict( + type='mmdet.CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=1, + widen_factor=1, + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint)), + neck=dict( + type='mmdet.CSPNeXtPAFPN', + in_channels=[256, 512, 1024], + out_channels=256, + num_csp_blocks=3, + expand_ratio=0.5, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU')), + bbox_head=dict( + type='RotatedRTMDetSepBNHead', + num_classes=15, + in_channels=256, + stacked_convs=2, + feat_channels=256, + angle_version=angle_version, + anchor_generator=dict( + type='mmdet.MlvlPointGenerator', offset=0, strides=[8, 16, 32]), + bbox_coder=dict( + type='DistanceAnglePointCoder', angle_version=angle_version), + loss_cls=dict( + type='mmdet.QualityFocalLoss', + use_sigmoid=True, + beta=2.0, + loss_weight=1.0), + loss_bbox=dict(type='RotatedIoULoss', mode='linear', loss_weight=2.0), + with_objectness=False, + exp_on_reg=True, + share_conv=True, + pred_kernel_size=1, + use_hbbox_loss=False, + scale_angle=False, + loss_angle=None, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU')), + train_cfg=dict( + assigner=dict( + type='mmdet.DynamicSoftLabelAssigner', + iou_calculator=dict(type='RBboxOverlaps2D'), + topk=13), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=2000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms_rotated', iou_threshold=0.1), + max_per_img=2000), +) + +train_pipeline = [ + dict( + type='mmdet.LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict(type='mmdet.CachedMosaic', img_scale=(1024, 1024), pad_val=114.0), + dict( + type='mmdet.RandomResize', + resize_type='mmdet.Resize', + scale=(2048, 2048), + ratio_range=(0.1, 2.0), + keep_ratio=True), + dict( + type='RandomRotate', + prob=0.5, + angle_range=180, + rect_obj_labels=[9, 11]), + dict(type='mmdet.RandomCrop', crop_size=(1024, 1024)), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='mmdet.RandomFlip', + prob=0.75, + direction=['horizontal', 'vertical', 'diagonal']), + dict( + type='mmdet.Pad', size=(1024, 1024), + pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.CachedMixUp', + img_scale=(1024, 1024), + ratio_range=(1.0, 1.0), + max_cached_images=20, + pad_val=(114, 114, 114)), + dict(type='mmdet.PackDetInputs') +] + +train_pipeline_stage2 = [ + dict( + type='mmdet.LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict( + type='mmdet.RandomResize', + resize_type='mmdet.Resize', + scale=(1024, 1024), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict( + type='RandomRotate', + prob=0.5, + angle_range=180, + rect_obj_labels=[9, 11]), + dict(type='mmdet.RandomCrop', crop_size=(1024, 1024)), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='mmdet.RandomFlip', + prob=0.75, + direction=['horizontal', 'vertical', 'diagonal']), + dict( + type='mmdet.Pad', size=(1024, 1024), + pad_val=dict(img=(114, 114, 114))), + dict(type='mmdet.PackDetInputs') +] + +# batch_size = (2 GPUs) x (4 samples per GPU) = 8 +train_dataloader = dict( + batch_size=4, num_workers=4, dataset=dict(pipeline=train_pipeline)) + +max_epochs = 100 +stage2_num_epochs = 10 +base_lr = 0.004 / 16 +interval = 20 + +train_cfg = dict(max_epochs=max_epochs, val_interval=interval) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +custom_hooks = [ + dict(type='mmdet.NumClassCheckHook'), + dict( + type='EMAHook', + ema_type='mmdet.ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] diff --git a/configs/rotated_rtmdet/rotated_rtmdet_l-300e-aug-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_l-300e-aug-hrsc.py new file mode 100644 index 000000000..eb178c0a1 --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_l-300e-aug-hrsc.py @@ -0,0 +1,183 @@ +_base_ = [ + './_base_/default_runtime.py', './_base_/schedule_3x.py', + './_base_/hrsc_rr.py' +] +checkpoint = './work_dirs/cspnext_rsb_pretrain/cspnext-l_8xb256-rsb-a1-600e_in1k/cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth' # noqa + +angle_version = 'le90' +model = dict( + type='mmdet.RTMDet', + data_preprocessor=dict( + type='mmdet.DetDataPreprocessor', + mean=[103.53, 116.28, 123.675], + std=[57.375, 57.12, 58.395], + bgr_to_rgb=False, + boxtype2tensor=False, + batch_augments=None), + backbone=dict( + type='mmdet.CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=1, + widen_factor=1, + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint)), + neck=dict( + type='mmdet.CSPNeXtPAFPN', + in_channels=[256, 512, 1024], + out_channels=256, + num_csp_blocks=3, + expand_ratio=0.5, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU')), + bbox_head=dict( + type='RotatedRTMDetSepBNHead', + num_classes=1, + in_channels=256, + stacked_convs=2, + feat_channels=256, + angle_version=angle_version, + anchor_generator=dict( + type='mmdet.MlvlPointGenerator', offset=0, strides=[8, 16, 32]), + bbox_coder=dict( + type='DistanceAnglePointCoder', angle_version=angle_version), + loss_cls=dict( + type='mmdet.QualityFocalLoss', + use_sigmoid=True, + beta=2.0, + loss_weight=1.0), + loss_bbox=dict(type='RotatedIoULoss', mode='linear', loss_weight=2.0), + with_objectness=False, + exp_on_reg=True, + share_conv=True, + pred_kernel_size=1, + use_hbbox_loss=False, + scale_angle=False, + loss_angle=None, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU')), + train_cfg=dict( + assigner=dict( + type='mmdet.DynamicSoftLabelAssigner', + iou_calculator=dict(type='RBboxOverlaps2D'), + topk=13), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=2000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms_rotated', iou_threshold=0.1), + max_per_img=2000), +) + +train_pipeline = [ + dict( + type='mmdet.LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict(type='mmdet.CachedMosaic', img_scale=(800, 800), pad_val=114.0), + dict( + type='mmdet.RandomResize', + resize_type='mmdet.Resize', + scale=(1600, 1600), + ratio_range=(0.1, 2.0), + keep_ratio=True), + dict( + type='RandomRotate', + prob=0.5, + angle_range=180, + rect_obj_labels=[9, 11]), + dict(type='mmdet.RandomCrop', crop_size=(1024, 1024)), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='mmdet.RandomFlip', + prob=0.75, + direction=['horizontal', 'vertical', 'diagonal']), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.CachedMixUp', + img_scale=(800, 800), + ratio_range=(1.0, 1.0), + max_cached_images=20, + pad_val=(114, 114, 114)), + dict(type='mmdet.PackDetInputs') +] + +train_pipeline_stage2 = [ + dict( + type='mmdet.LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict( + type='mmdet.RandomResize', + resize_type='mmdet.Resize', + scale=(800, 800), + ratio_range=(0.1, 2.0), + keep_ratio=True), + dict(type='RandomRotate', prob=0.5, angle_range=180), + dict(type='mmdet.RandomCrop', crop_size=(800, 800)), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='mmdet.RandomFlip', + prob=0.75, + direction=['horizontal', 'vertical', 'diagonal']), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict(type='mmdet.PackDetInputs') +] + +# batch_size = (1 GPUs) x (8 samples per GPU) = 8 +train_dataloader = dict( + batch_size=8, + num_workers=8, + dataset=dict(dataset=dict(pipeline=train_pipeline))) + +# training schedule, hrsc dataset is repeated 3 times, in +# `./_base_/hrsc_rr.py`, so the actual epoch = 100 * 3 = 300 +max_epochs = 100 +stage2_num_epochs = 10 + +# hrsc dataset use larger learning rate for better performance +base_lr = 0.004 / 2 +interval = 20 + +train_cfg = dict(max_epochs=max_epochs, val_interval=interval) + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +custom_hooks = [ + dict(type='mmdet.NumClassCheckHook'), + dict( + type='EMAHook', + ema_type='mmdet.ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=max_epochs - stage2_num_epochs, + switch_pipeline=train_pipeline_stage2) +] diff --git a/configs/rotated_rtmdet/rotated_rtmdet_l-3x-dota.py b/configs/rotated_rtmdet/rotated_rtmdet_l-3x-dota.py index 513494c00..e451f9fd0 100644 --- a/configs/rotated_rtmdet/rotated_rtmdet_l-3x-dota.py +++ b/configs/rotated_rtmdet/rotated_rtmdet_l-3x-dota.py @@ -2,6 +2,8 @@ './_base_/default_runtime.py', './_base_/schedule_3x.py', './_base_/dota_rr.py' ] +checkpoint = './work_dirs/cspnext_rsb_pretrain/cspnext-l_8xb256-rsb-a1-600e_in1k/cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth' # noqa + angle_version = 'le90' model = dict( type='mmdet.RTMDet', @@ -20,7 +22,9 @@ widen_factor=1, channel_attention=True, norm_cfg=dict(type='SyncBN'), - act_cfg=dict(type='SiLU')), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint)), neck=dict( type='mmdet.CSPNeXtPAFPN', in_channels=[256, 512, 1024], @@ -31,7 +35,6 @@ act_cfg=dict(type='SiLU')), bbox_head=dict( type='RotatedRTMDetSepBNHead', - # type='RTMDetSepBNHead', num_classes=15, in_channels=256, stacked_convs=2, @@ -53,11 +56,7 @@ pred_kernel_size=1, use_hbbox_loss=False, scale_angle=False, - # angle_coder=dict( - # type='DistributionAngleCoder', - # angle_version='le90'), loss_angle=None, - # loss_bbox=dict(type='mmdet.GIoULoss', loss_weight=2.0), norm_cfg=dict(type='SyncBN'), act_cfg=dict(type='SiLU')), train_cfg=dict( @@ -75,3 +74,6 @@ nms=dict(type='nms_rotated', iou_threshold=0.1), max_per_img=2000), ) + +# batch_size = (2 GPUs) x (4 samples per GPU) = 8 +train_dataloader = dict(batch_size=4, num_workers=4) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_l-3x-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_l-3x-dota_ms.py similarity index 83% rename from configs/rotated_rtmdet/rotated_rtmdet_l-3x-hrsc.py rename to configs/rotated_rtmdet/rotated_rtmdet_l-3x-dota_ms.py index f28d27617..91d07d543 100644 --- a/configs/rotated_rtmdet/rotated_rtmdet_l-3x-hrsc.py +++ b/configs/rotated_rtmdet/rotated_rtmdet_l-3x-dota_ms.py @@ -1,7 +1,9 @@ _base_ = [ './_base_/default_runtime.py', './_base_/schedule_3x.py', - './_base_/hrsc_rr.py' + './_base_/dota_rr_ms.py' ] +checkpoint = './work_dirs/cspnext_rsb_pretrain/cspnext-l_8xb256-rsb-a1-600e_in1k/cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth' # noqa + angle_version = 'le90' model = dict( type='mmdet.RTMDet', @@ -20,7 +22,9 @@ widen_factor=1, channel_attention=True, norm_cfg=dict(type='SyncBN'), - act_cfg=dict(type='SiLU')), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint)), neck=dict( type='mmdet.CSPNeXtPAFPN', in_channels=[256, 512, 1024], @@ -31,8 +35,7 @@ act_cfg=dict(type='SiLU')), bbox_head=dict( type='RotatedRTMDetSepBNHead', - # type='RTMDetSepBNHead', - num_classes=1, + num_classes=15, in_channels=256, stacked_convs=2, feat_channels=256, @@ -53,11 +56,7 @@ pred_kernel_size=1, use_hbbox_loss=False, scale_angle=False, - # angle_coder=dict( - # type='DistributionAngleCoder', - # angle_version='le90'), loss_angle=None, - # loss_bbox=dict(type='mmdet.GIoULoss', loss_weight=2.0), norm_cfg=dict(type='SyncBN'), act_cfg=dict(type='SiLU')), train_cfg=dict( @@ -75,3 +74,6 @@ nms=dict(type='nms_rotated', iou_threshold=0.1), max_per_img=2000), ) + +# batch_size = (2 GPUs) x (4 samples per GPU) = 8 +train_dataloader = dict(batch_size=4, num_workers=4) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_l-9x-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_l-9x-hrsc.py new file mode 100644 index 000000000..a2b45734f --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_l-9x-hrsc.py @@ -0,0 +1,105 @@ +_base_ = [ + './_base_/default_runtime.py', './_base_/schedule_3x.py', + './_base_/hrsc_rr.py' +] +checkpoint = './work_dirs/cspnext_rsb_pretrain/cspnext-l_8xb256-rsb-a1-600e_in1k/cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth' # noqa + +angle_version = 'le90' +model = dict( + type='mmdet.RTMDet', + data_preprocessor=dict( + type='mmdet.DetDataPreprocessor', + mean=[103.53, 116.28, 123.675], + std=[57.375, 57.12, 58.395], + bgr_to_rgb=False, + boxtype2tensor=False, + batch_augments=None), + backbone=dict( + type='mmdet.CSPNeXt', + arch='P5', + expand_ratio=0.5, + deepen_factor=1, + widen_factor=1, + channel_attention=True, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU'), + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint)), + neck=dict( + type='mmdet.CSPNeXtPAFPN', + in_channels=[256, 512, 1024], + out_channels=256, + num_csp_blocks=3, + expand_ratio=0.5, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU')), + bbox_head=dict( + type='RotatedRTMDetSepBNHead', + num_classes=1, + in_channels=256, + stacked_convs=2, + feat_channels=256, + angle_version=angle_version, + anchor_generator=dict( + type='mmdet.MlvlPointGenerator', offset=0, strides=[8, 16, 32]), + bbox_coder=dict( + type='DistanceAnglePointCoder', angle_version=angle_version), + loss_cls=dict( + type='mmdet.QualityFocalLoss', + use_sigmoid=True, + beta=2.0, + loss_weight=1.0), + loss_bbox=dict(type='RotatedIoULoss', mode='linear', loss_weight=2.0), + with_objectness=False, + exp_on_reg=True, + share_conv=True, + pred_kernel_size=1, + use_hbbox_loss=False, + scale_angle=False, + loss_angle=None, + norm_cfg=dict(type='SyncBN'), + act_cfg=dict(type='SiLU')), + train_cfg=dict( + assigner=dict( + type='mmdet.DynamicSoftLabelAssigner', + iou_calculator=dict(type='RBboxOverlaps2D'), + topk=13), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=2000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms_rotated', iou_threshold=0.1), + max_per_img=2000), +) + +# training schedule, hrsc dataset is repeated 3 times, in +# `./_base_/hrsc_rr.py`, so the actual epoch = 3 * 3 * 12 = 9 * 12 +max_epochs = 3 * 12 + +# hrsc dataset use larger learning rate for better performance +base_lr = 0.004 / 2 + +# learning rate +param_scheduler = [ + dict( + type='LinearLR', + start_factor=1.0e-5, + by_epoch=False, + begin=0, + end=1000), + dict( + # use cosine lr from 150 to 300 epoch + type='CosineAnnealingLR', + eta_min=base_lr * 0.05, + begin=max_epochs // 2, + end=max_epochs, + T_max=max_epochs // 2, + by_epoch=True, + convert_to_iter_based=True), +] + +# optimizer +optim_wrapper = dict(optimizer=dict(lr=base_lr)) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_l-coco_pretrain-3x-dota_ms.py b/configs/rotated_rtmdet/rotated_rtmdet_l-coco_pretrain-3x-dota_ms.py new file mode 100644 index 000000000..be32c8c98 --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_l-coco_pretrain-3x-dota_ms.py @@ -0,0 +1,17 @@ +_base_ = './rotated_rtmdet_l-3x-dota_ms.py' + +coco_ckpt = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_l_8xb32-300e_coco/rtmdet_l_8xb32-300e_coco_20220719_112030-5a0be7c4.pth' # noqa + +model = dict( + backbone=dict( + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=coco_ckpt)), + neck=dict( + init_cfg=dict(type='Pretrained', prefix='neck.', + checkpoint=coco_ckpt)), + bbox_head=dict( + init_cfg=dict( + type='Pretrained', prefix='bbox_head.', checkpoint=coco_ckpt))) + +# batch_size = (2 GPUs) x (4 samples per GPU) = 8 +train_dataloader = dict(batch_size=4, num_workers=4) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_m-3x-dota.py b/configs/rotated_rtmdet/rotated_rtmdet_m-3x-dota.py new file mode 100644 index 000000000..3118ce98a --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_m-3x-dota.py @@ -0,0 +1,18 @@ +_base_ = './rotated_rtmdet_l-3x-dota.py' + +checkpoint = './work_dirs/cspnext_rsb_pretrain/cspnext-m_8xb256-rsb-a1-600e_in1k/cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth' # noqa + +model = dict( + backbone=dict( + deepen_factor=0.67, + widen_factor=0.75, + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint)), + neck=dict(in_channels=[192, 384, 768], out_channels=192, num_csp_blocks=2), + bbox_head=dict( + in_channels=192, + feat_channels=192, + loss_bbox=dict(type='RotatedIoULoss', mode='linear', loss_weight=2.0))) + +# batch_size = (1 GPUs) x (8 samples per GPU) = 8 +train_dataloader = dict(batch_size=8, num_workers=8) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_m-3x-dota_ms.py b/configs/rotated_rtmdet/rotated_rtmdet_m-3x-dota_ms.py new file mode 100644 index 000000000..5182fa65a --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_m-3x-dota_ms.py @@ -0,0 +1,18 @@ +_base_ = './rotated_rtmdet_l-3x-dota_ms.py' + +checkpoint = './work_dirs/cspnext_rsb_pretrain/cspnext-m_8xb256-rsb-a1-600e_in1k/cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth' # noqa + +model = dict( + backbone=dict( + deepen_factor=0.67, + widen_factor=0.75, + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint)), + neck=dict(in_channels=[192, 384, 768], out_channels=192, num_csp_blocks=2), + bbox_head=dict( + in_channels=192, + feat_channels=192, + loss_bbox=dict(type='RotatedIoULoss', mode='linear', loss_weight=2.0))) + +# batch_size = (1 GPUs) x (8 samples per GPU) = 8 +train_dataloader = dict(batch_size=8, num_workers=8) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_m-3x-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_m-3x-hrsc.py deleted file mode 100644 index 1225f5119..000000000 --- a/configs/rotated_rtmdet/rotated_rtmdet_m-3x-hrsc.py +++ /dev/null @@ -1,7 +0,0 @@ -_base_ = './rotated_rtmdet_l-3x-hrsc.py' - -# coco_ckpt = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth' # noqa -model = dict( - backbone=dict(deepen_factor=0.67, widen_factor=0.75), - neck=dict(in_channels=[192, 384, 768], out_channels=192, num_csp_blocks=2), - bbox_head=dict(in_channels=192, feat_channels=192)) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_s-3x-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_s-3x-dota.py similarity index 54% rename from configs/rotated_rtmdet/rotated_rtmdet_s-3x-hrsc.py rename to configs/rotated_rtmdet/rotated_rtmdet_s-3x-dota.py index 0b55a5b5d..41cb55b13 100644 --- a/configs/rotated_rtmdet/rotated_rtmdet_s-3x-hrsc.py +++ b/configs/rotated_rtmdet/rotated_rtmdet_s-3x-dota.py @@ -1,4 +1,4 @@ -_base_ = './rotated_rtmdet_l-3x-hrsc.py' +_base_ = './rotated_rtmdet_l-3x-dota.py' checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' # noqa @@ -9,4 +9,12 @@ init_cfg=dict( type='Pretrained', prefix='backbone.', checkpoint=checkpoint)), neck=dict(in_channels=[128, 256, 512], out_channels=128, num_csp_blocks=1), - bbox_head=dict(in_channels=128, feat_channels=128, exp_on_reg=False)) + bbox_head=dict( + in_channels=128, + feat_channels=128, + exp_on_reg=False, + loss_bbox=dict(type='RotatedIoULoss', mode='linear', loss_weight=2.0), + )) + +# batch_size = (1 GPUs) x (8 samples per GPU) = 8 +train_dataloader = dict(batch_size=8, num_workers=8) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_s-3x-dota_ms.py b/configs/rotated_rtmdet/rotated_rtmdet_s-3x-dota_ms.py new file mode 100644 index 000000000..5574e34fb --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_s-3x-dota_ms.py @@ -0,0 +1,20 @@ +_base_ = './rotated_rtmdet_l-3x-dota_ms.py' + +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' # noqa + +model = dict( + backbone=dict( + deepen_factor=0.33, + widen_factor=0.5, + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint)), + neck=dict(in_channels=[128, 256, 512], out_channels=128, num_csp_blocks=1), + bbox_head=dict( + in_channels=128, + feat_channels=128, + exp_on_reg=False, + loss_bbox=dict(type='RotatedIoULoss', mode='linear', loss_weight=2.0), + )) + +# batch_size = (1 GPUs) x (8 samples per GPU) = 8 +train_dataloader = dict(batch_size=8, num_workers=8) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_tiny-300e-aug-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_tiny-300e-aug-hrsc.py new file mode 100644 index 000000000..43a8d92f9 --- /dev/null +++ b/configs/rotated_rtmdet/rotated_rtmdet_tiny-300e-aug-hrsc.py @@ -0,0 +1,92 @@ +_base_ = './rotated_rtmdet_l-300e-aug-hrsc.py' + +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa + +model = dict( + backbone=dict( + deepen_factor=0.167, + widen_factor=0.375, + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint)), + neck=dict(in_channels=[96, 192, 384], out_channels=96, num_csp_blocks=1), + bbox_head=dict( + in_channels=96, + feat_channels=96, + exp_on_reg=False, + loss_bbox=dict(type='RotatedIoULoss', mode='linear', loss_weight=2.0), + )) + +train_pipeline = [ + dict( + type='mmdet.LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict( + type='mmdet.CachedMosaic', + img_scale=(800, 800), + pad_val=114.0, + max_cached_images=20, + random_pop=False), + dict( + type='mmdet.RandomResize', + resize_type='mmdet.Resize', + scale=(1600, 1600), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomRotate', prob=0.5, angle_range=180), + dict(type='mmdet.RandomCrop', crop_size=(800, 800)), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='mmdet.RandomFlip', + prob=0.75, + direction=['horizontal', 'vertical', 'diagonal']), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict( + type='mmdet.CachedMixUp', + img_scale=(800, 800), + ratio_range=(1.0, 1.0), + max_cached_images=10, + random_pop=False, + pad_val=(114, 114, 114), + prob=0.5), + dict(type='mmdet.PackDetInputs') +] + +train_pipeline_stage2 = [ + dict( + type='mmdet.LoadImageFromFile', + file_client_args={{_base_.file_client_args}}), + dict(type='mmdet.LoadAnnotations', with_bbox=True, box_type='qbox'), + dict(type='ConvertBoxType', box_type_mapping=dict(gt_bboxes='rbox')), + dict( + type='mmdet.RandomResize', + resize_type='mmdet.Resize', + scale=(800, 800), + ratio_range=(0.5, 2.0), + keep_ratio=True), + dict(type='RandomRotate', prob=0.5, angle_range=180), + dict(type='mmdet.RandomCrop', crop_size=(800, 800)), + dict(type='mmdet.YOLOXHSVRandomAug'), + dict( + type='mmdet.RandomFlip', + prob=0.75, + direction=['horizontal', 'vertical', 'diagonal']), + dict(type='mmdet.Pad', size=(800, 800), pad_val=dict(img=(114, 114, 114))), + dict(type='mmdet.PackDetInputs') +] + +train_dataloader = dict(dataset=dict(dataset=dict(pipeline=train_pipeline))) +custom_hooks = [ + dict(type='mmdet.NumClassCheckHook'), + dict( + type='EMAHook', + ema_type='mmdet.ExpMomentumEMA', + momentum=0.0002, + update_buffers=True, + priority=49), + dict( + type='mmdet.PipelineSwitchHook', + switch_epoch=90, + switch_pipeline=train_pipeline_stage2) +] diff --git a/configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota.py b/configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota.py index 77c81f587..fb573fba4 100644 --- a/configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota.py +++ b/configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota.py @@ -1,16 +1,13 @@ _base_ = './rotated_rtmdet_l-3x-dota.py' -coco_ckpt = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth' # noqa -# checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa model = dict( - init_cfg=dict(type='Pretrained', checkpoint=coco_ckpt), backbone=dict( deepen_factor=0.167, widen_factor=0.375, - # init_cfg=dict( - # type='Pretrained', prefix='backbone.', checkpoint=checkpoint) - ), + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint)), neck=dict(in_channels=[96, 192, 384], out_channels=96, num_csp_blocks=1), bbox_head=dict( in_channels=96, @@ -18,3 +15,6 @@ exp_on_reg=False, loss_bbox=dict(type='RotatedIoULoss', mode='linear', loss_weight=2.0), )) + +# batch_size = (1 GPUs) x (8 samples per GPU) = 8 +train_dataloader = dict(batch_size=8, num_workers=8) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_tiny_probiou-3x-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota_ms.py similarity index 59% rename from configs/rotated_rtmdet/rotated_rtmdet_tiny_probiou-3x-hrsc.py rename to configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota_ms.py index 2b4bf41ab..c422eedd1 100644 --- a/configs/rotated_rtmdet/rotated_rtmdet_tiny_probiou-3x-hrsc.py +++ b/configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota_ms.py @@ -1,10 +1,8 @@ -_base_ = './rotated_rtmdet_l-3x-hrsc.py' +_base_ = './rotated_rtmdet_l-3x-dota_ms.py' -# coco_ckpt = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth' # noqa checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa model = dict( - # init_cfg=dict(type='Pretrained', checkpoint=coco_ckpt), backbone=dict( deepen_factor=0.167, widen_factor=0.375, @@ -15,5 +13,8 @@ in_channels=96, feat_channels=96, exp_on_reg=False, - loss_bbox=dict(type='ProbIoULoss', mode='l1', loss_weight=2.0), + loss_bbox=dict(type='RotatedIoULoss', mode='linear', loss_weight=2.0), )) + +# batch_size = (1 GPUs) x (8 samples per GPU) = 8 +train_dataloader = dict(batch_size=8, num_workers=8) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_tiny-9x-hrsc.py similarity index 93% rename from configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-hrsc.py rename to configs/rotated_rtmdet/rotated_rtmdet_tiny-9x-hrsc.py index b1b7ed172..07b85fb5c 100644 --- a/configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-hrsc.py +++ b/configs/rotated_rtmdet/rotated_rtmdet_tiny-9x-hrsc.py @@ -1,4 +1,4 @@ -_base_ = './rotated_rtmdet_l-3x-hrsc.py' +_base_ = './rotated_rtmdet_l-9x-hrsc.py' checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa diff --git a/configs/rotated_rtmdet/rotated_rtmdet_x-3x-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_x-3x-hrsc.py deleted file mode 100644 index a3539c096..000000000 --- a/configs/rotated_rtmdet/rotated_rtmdet_x-3x-hrsc.py +++ /dev/null @@ -1,8 +0,0 @@ -_base_ = './rotated_rtmdet_l-3x-hrsc.py' - -# coco_ckpt = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/rtmdet_tiny_8xb32-300e_coco/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth' # noqa -model = dict( - backbone=dict(deepen_factor=1.33, widen_factor=1.25), - neck=dict( - in_channels=[320, 640, 1280], out_channels=320, num_csp_blocks=4), - bbox_head=dict(in_channels=320, feat_channels=320)) From 04348cef24fed7494b97637cfe2762a4afb1f6a0 Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Thu, 1 Dec 2022 13:59:51 +0800 Subject: [PATCH 29/52] fix readme --- configs/rotated_rtmdet/README.md | 33 ++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/configs/rotated_rtmdet/README.md b/configs/rotated_rtmdet/README.md index 8e2bd6019..ae06a5cdb 100644 --- a/configs/rotated_rtmdet/README.md +++ b/configs/rotated_rtmdet/README.md @@ -14,26 +14,27 @@ Our tech-report will be released soon. ### DOTA-v1.0 -| Backbone | size | pretrain | schedule | Aug | mAP | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) | Config | Download | -| :---------: | :--: | :------: | :------: | :---: | :---: | :-------: | :------: | :------------------: | :------------------------------------------------------: | :-----------------------: | -| RTMDet-tiny | 1024 | IMP | 3x | RR | 75.60 | 4.88 | 20.45 | 4.40 | [config](./rotated_rtmdet_tiny-3x-dota.py) | [model](<>) \\ [log](<>) | -| RTMDet-tiny | 1024 | IMP | 3x | MS+RR | 79.82 | 4.88 | 20.45 | 4.46 | [config](./rotated_rtmdet_tiny-3x-dota_ms.py) | [model](<>) \\ [log](<>) | -| RTMDet-s | 1024 | IMP | 3x | RR | 76.93 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota.py) | [model](<>) \\ [log](<>) | -| RTMDet-s | 1024 | IMP | 3x | MS+RR | 79.98 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota_ms.py) | [model](<>) \\ [log](<>) | -| RTMDet-m | 1024 | IMP | 3x | RR | 78.24 | 24.67 | 99.76 | 7.82 | [config](./rotated_rtmdet_m-3x-dota.py) | [model](<>) \\ [log](<>) | -| RTMDet-m | 1024 | IMP | 3x | MS+RR | 80.26 | 24.67 | 99.76 | 7.82 | [config](./rotated_rtmdet_m-3x-dota_ms.py) | [model](<>) \\ [log](<>) | -| RTMDet-l | 1024 | IMP | 3x | RR | 78.85 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-3x-dota.py) | [model](<>) \\ [log](<>) | -| RTMDet-l | 1024 | IMP | 3x | MS+RR | 80.54 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-3x-dota_ms.py) | [model](<>) \\ [log](<>) | -| RTMDet-l | 1024 | COP | 3x | MS+RR | 81.33 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-coco_pretrain-3x-dota_ms.py) | [model](<>) \\ [log](<>) | +| Backbone | size | pretrain | Aug | mAP | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) | Config | Download | +| :---------: | :--: | :------: | :---: | :---: | :-------: | :------: | :------------------: | :------------------------------------------------------: | :-----------------------: | +| RTMDet-tiny | 1024 | IMP | RR | 75.60 | 4.88 | 20.45 | 4.40 | [config](./rotated_rtmdet_tiny-3x-dota.py) | [model](<>) \| [log](<>) | +| RTMDet-tiny | 1024 | IMP | MS+RR | 79.82 | 4.88 | 20.45 | 4.46 | [config](./rotated_rtmdet_tiny-3x-dota_ms.py) | [model](<>) \| [log](<>) | +| RTMDet-s | 1024 | IMP | RR | 76.93 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota.py) | [model](<>) \| [log](<>) | +| RTMDet-s | 1024 | IMP | MS+RR | 79.98 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota_ms.py) | [model](<>) \| [log](<>) | +| RTMDet-m | 1024 | IMP | RR | 78.24 | 24.67 | 99.76 | 7.82 | [config](./rotated_rtmdet_m-3x-dota.py) | [model](<>) \| [log](<>) | +| RTMDet-m | 1024 | IMP | MS+RR | 80.26 | 24.67 | 99.76 | 7.82 | [config](./rotated_rtmdet_m-3x-dota_ms.py) | [model](<>) \| [log](<>) | +| RTMDet-l | 1024 | IMP | RR | 78.85 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-3x-dota.py) | [model](<>) \| [log](<>) | +| RTMDet-l | 1024 | IMP | MS+RR | 80.54 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-3x-dota_ms.py) | [model](<>) \| [log](<>) | +| RTMDet-l | 1024 | COP | MS+RR | 81.33 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-coco_pretrain-3x-dota_ms.py) | [model](<>) \| [log](<>) | ### HRSC -| Backbone | size | pretrain | schedule | Aug | mAP07 | mAP12 | Params(M) | FLOPS(G) | Config | Download | -| :---------: | :--: | :------: | :------: | :-: | :---: | :---: | :-------: | :------: | :----------------------------------------: | :-----------------------: | -| RTMDet-tiny | 800 | IMP | 9x | RR | 90.6 | 97.1 | 4.88 | 12.54 | [config](./rotated_rtmdet_tiny-9x-hrsc.py) | [model](<>) \\ [log](<>) | +| Backbone | size | pretrain | Aug | mAP07 | mAP12 | Params(M) | FLOPS(G) | Config | Download | +| :---------: | :--: | :------: | :-: | :---: | :---: | :-------: | :------: | :----------------------------------------: | :-----------------------: | +| RTMDet-tiny | 800 | IMP | RR | 90.6 | 97.1 | 4.88 | 12.54 | [config](./rotated_rtmdet_tiny-9x-hrsc.py) | [model](<>) \| [log](<>) | **Note**: -1. The inference speed is measured on an NVIDIA 2080Ti GPU with TensorRT 8.4.3, cuDNN 8.2.0, FP16, batch size=1, and +1. By default, DOTA-v1.0 dataset trained with 3x schedule and HRSC dataset trained with 9x schedule. +2. The inference speed is measured on an NVIDIA 2080Ti GPU with TensorRT 8.4.3, cuDNN 8.2.0, FP16, batch size=1, and without NMS. -2. We also provide config with mixup and mosaic for longer schedule. +3. We also provide config with mixup and mosaic for longer schedule. From bcf9da76b3804637e96b171130b3b390eac6816e Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Thu, 1 Dec 2022 14:03:26 +0800 Subject: [PATCH 30/52] fix readme --- configs/rotated_rtmdet/README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/configs/rotated_rtmdet/README.md b/configs/rotated_rtmdet/README.md index ae06a5cdb..636770a90 100644 --- a/configs/rotated_rtmdet/README.md +++ b/configs/rotated_rtmdet/README.md @@ -28,13 +28,14 @@ Our tech-report will be released soon. ### HRSC -| Backbone | size | pretrain | Aug | mAP07 | mAP12 | Params(M) | FLOPS(G) | Config | Download | -| :---------: | :--: | :------: | :-: | :---: | :---: | :-------: | :------: | :----------------------------------------: | :-----------------------: | -| RTMDet-tiny | 800 | IMP | RR | 90.6 | 97.1 | 4.88 | 12.54 | [config](./rotated_rtmdet_tiny-9x-hrsc.py) | [model](<>) \| [log](<>) | +| Backbone | size | pretrain | Aug | mAP 07 | mAP 12 | Params(M) | FLOPS(G) | Config | Download | +| :---------: | :--: | :------: | :-: | :----: | :----: | :-------: | :------: | :----------------------------------------: | :-----------------------: | +| RTMDet-tiny | 800 | IMP | RR | 90.6 | 97.1 | 4.88 | 12.54 | [config](./rotated_rtmdet_tiny-9x-hrsc.py) | [model](<>) \| [log](<>) | **Note**: 1. By default, DOTA-v1.0 dataset trained with 3x schedule and HRSC dataset trained with 9x schedule. -2. The inference speed is measured on an NVIDIA 2080Ti GPU with TensorRT 8.4.3, cuDNN 8.2.0, FP16, batch size=1, and +2. `IMP` means ImageNet pretrain, `COP` means COCO pretrain. +3. The inference speed is measured on an NVIDIA 2080Ti GPU with TensorRT 8.4.3, cuDNN 8.2.0, FP16, batch size=1, and without NMS. -3. We also provide config with mixup and mosaic for longer schedule. +4. We also provide config with mixup and mosaic for longer schedule. From c570a5f13c654f395ea3629c354ab33773307239 Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Thu, 1 Dec 2022 14:06:49 +0800 Subject: [PATCH 31/52] clean --- configs/rotated_rtmdet/_base_/schedule_3x.py | 1 - 1 file changed, 1 deletion(-) diff --git a/configs/rotated_rtmdet/_base_/schedule_3x.py b/configs/rotated_rtmdet/_base_/schedule_3x.py index 30f850b3d..110a8acc5 100644 --- a/configs/rotated_rtmdet/_base_/schedule_3x.py +++ b/configs/rotated_rtmdet/_base_/schedule_3x.py @@ -16,7 +16,6 @@ begin=0, end=1000), dict( - # use cosine lr from 150 to 300 epoch type='CosineAnnealingLR', eta_min=base_lr * 0.05, begin=max_epochs // 2, From 76d54e2b3d71ac9ae19caf871aaed99f5b771b70 Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Thu, 1 Dec 2022 14:23:36 +0800 Subject: [PATCH 32/52] add docstring --- .../models/dense_heads/rotated_rtmdet_head.py | 51 +++++++++++-------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/mmrotate/models/dense_heads/rotated_rtmdet_head.py b/mmrotate/models/dense_heads/rotated_rtmdet_head.py index e63473141..42b7b7770 100644 --- a/mmrotate/models/dense_heads/rotated_rtmdet_head.py +++ b/mmrotate/models/dense_heads/rotated_rtmdet_head.py @@ -30,10 +30,13 @@ class RotatedRTMDetHead(RTMDetHead): num_classes (int): Number of categories excluding the background category. in_channels (int): Number of channels in the input feature map. - with_objectness (bool): Whether to add an objectness branch. - Defaults to True. - act_cfg (:obj:`ConfigDict` or dict): Config dict for activation layer. - Default: dict(type='ReLU') + angle_version (str): Angle representations. Defaults to 'le90'. + use_hbbox_loss (bool): If true, use horizontal bbox loss and + loss_angle should not be None. Default to False. + scale_angle (bool): If true, add scale to angle pred branch. + Default to True. + angle_coder (:obj:`ConfigDict` or dict): Config of angle coder. + loss_angle (:obj:`ConfigDict` or dict, Optional): Config of angle loss. """ def __init__(self, @@ -141,7 +144,10 @@ def loss_by_feat_single(self, cls_score: Tensor, bbox_pred: Tensor, cls_score (Tensor): Box scores for each scale level Has shape (N, num_anchors * num_classes, H, W). bbox_pred (Tensor): Decoded bboxes for each scale - level with shape (N, num_anchors * 4, H, W). + level with shape (N, num_anchors * 5, H, W) for rbox loss + or (N, num_anchors * 4, H, W) for hbox loss. + angle_pred (Tensor): Decoded bboxes for each scale + level with shape (N, num_anchors * angle_dim, H, W). labels (Tensor): Labels of each anchors with shape (N, num_total_anchors). label_weights (Tensor): Label weights of each anchor with shape @@ -233,10 +239,12 @@ def loss_by_feat(self, Args: cls_scores (list[Tensor]): Box scores for each scale level - Has shape (N, num_anchors * num_classes, H, W) - bbox_preds (list[Tensor]): Decoded box for each scale + Has shape (N, num_anchors * num_classes, H, W). + bbox_preds (list[Tensor]): Box predict for each scale level with shape (N, num_anchors * 4, H, W) in - [tl_x, tl_y, br_x, br_y] format. + [t, b, l, r] format. + bbox_preds (list[Tensor]): Angle pred for each scale + level with shape (N, num_anchors * angle_dim, H, W). batch_gt_instances (list[:obj:`InstanceData`]): Batch of gt_instance. It usually includes ``bboxes`` and ``labels`` attributes. @@ -367,7 +375,7 @@ def _get_targets_single(self, - label_weights (Tensor): Label weights of all anchor in the image with shape (N,). - bbox_targets (Tensor): BBox targets of all anchors in the - image with shape (N, 4). + image with shape (N, 5). - norm_alignment_metrics (Tensor): Normalized alignment metrics of all priors in the image with shape (N,). """ @@ -404,7 +412,6 @@ def _get_targets_single(self, if len(pos_inds) > 0: # point-based pos_bbox_targets = sampling_result.pos_gt_bboxes - # TODO add arg angle_version pos_bbox_targets = pos_bbox_targets.regularize_boxes( self.angle_version) bbox_targets[pos_inds, :] = pos_bbox_targets @@ -460,7 +467,7 @@ def predict_by_feat(self, scale levels, each is a 4D-tensor, has shape (batch_size, num_priors * 4, H, W). angle_preds (list[Tensor]): Box angle for each scale level - with shape (N, num_points * encode_size, H, W) + with shape (N, num_points * angle_dim, H, W) score_factors (list[Tensor], optional): Score factor for all scale level, each is a 4D-tensor, has shape (batch_size, num_priors * 1, H, W). Defaults to None. @@ -550,7 +557,7 @@ def _predict_by_feat_single(self, all scale levels of a single image, each item has shape (num_priors * 4, H, W). angle_pred_list (list[Tensor]): Box angle for a single scale - level with shape (N, num_points * encode_size, H, W). + level with shape (N, num_points * angle_dim, H, W). score_factor_list (list[Tensor]): Score factor from all scale levels of a single image, each item has shape (num_priors * 1, H, W). @@ -605,7 +612,6 @@ def _predict_by_feat_single(self, assert cls_score.size()[-2:] == bbox_pred.size()[-2:] - # dim = self.bbox_coder.encode_size bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4) angle_pred = angle_pred.permute(1, 2, 0).reshape( -1, self.angle_coder.encode_size) @@ -682,11 +688,15 @@ class RotatedRTMDetSepBNHead(RotatedRTMDetHead): in_channels (int): Number of channels in the input feature map. share_conv (bool): Whether to share conv layers between stages. Defaults to True. + scale_angle (bool): Does not support in RotatedRTMDetSepBNHead, + Defaults to False. norm_cfg (:obj:`ConfigDict` or dict)): Config dict for normalization layer. Defaults to dict(type='BN', momentum=0.03, eps=0.001). act_cfg (:obj:`ConfigDict` or dict)): Config dict for activation layer. Defaults to dict(type='SiLU'). pred_kernel_size (int): Kernel size of prediction layer. Defaults to 1. + exp_on_reg (bool): Whether to apply exponential on bbox_pred. + Defaults to False. """ def __init__(self, @@ -698,7 +708,7 @@ def __init__(self, type='BN', momentum=0.03, eps=0.001), act_cfg: ConfigType = dict(type='SiLU'), pred_kernel_size: int = 1, - exp_on_reg=False, + exp_on_reg: bool = False, **kwargs) -> None: self.share_conv = share_conv self.exp_on_reg = exp_on_reg @@ -809,15 +819,16 @@ def forward(self, feats: Tuple[Tensor, ...]) -> tuple: Returns: tuple: Usually a tuple of classification scores and bbox prediction - - - cls_scores (tuple[Tensor]): Classification scores for all scale + - cls_scores (list[Tensor]): Classification scores for all scale levels, each is a 4D-tensor, the channels number is - num_anchors * num_classes. - - bbox_preds (tuple[Tensor]): Box energies / deltas for all scale + num_base_priors * num_classes. + - bbox_preds (list[Tensor]): Box energies / deltas for all scale levels, each is a 4D-tensor, the channels number is - num_anchors * 4. + num_base_priors * 4. + - angle_preds (list[Tensor]): Angle prediction for all scale + levels, each is a 4D-tensor, the channels number is + num_base_priors * angle_dim. """ - cls_scores = [] bbox_preds = [] angle_preds = [] From ab1bca2a9a4592b23609bf8fc75abc5ea4ec342b Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Thu, 1 Dec 2022 15:14:45 +0800 Subject: [PATCH 33/52] add unittest --- .../test_rotated_rtmdet_head.py | 213 ++++++++++++++++++ 1 file changed, 213 insertions(+) create mode 100644 tests/test_models/test_dense_heads/test_rotated_rtmdet_head.py diff --git a/tests/test_models/test_dense_heads/test_rotated_rtmdet_head.py b/tests/test_models/test_dense_heads/test_rotated_rtmdet_head.py new file mode 100644 index 000000000..527133fc6 --- /dev/null +++ b/tests/test_models/test_dense_heads/test_rotated_rtmdet_head.py @@ -0,0 +1,213 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import unittest + +import pytest +import torch +from mmdet.models import L1Loss +from mmengine.structures import InstanceData +from parameterized import parameterized + +from mmrotate.models.dense_heads import (RotatedRTMDetHead, + RotatedRTMDetSepBNHead) +from mmrotate.structures import RotatedBoxes +from mmrotate.utils import register_all_modules + + +class TestRotatedRTMDetHead(unittest.TestCase): + + def setUp(self): + register_all_modules() + + @pytest.mark.skipif( + not torch.cuda.is_available(), reason='requires CUDA support') + @parameterized.expand([(RotatedRTMDetHead, ), (RotatedRTMDetSepBNHead, )]) + def test_rotated_rtmdet_head_loss(self, head_cls): + """Tests rotated rtmdet head loss when truth is empty and non-empty.""" + angle_version = 'le90' + s = 256 + img_metas = [{ + 'img_shape': (s, s, 3), + 'pad_shape': (s, s, 3), + 'scale_factor': 1, + }] + rtm_head = head_cls( + num_classes=4, + in_channels=1, + feat_channels=1, + stacked_convs=1, + angle_version=angle_version, + anchor_generator=dict( + type='mmdet.MlvlPointGenerator', offset=0, strides=[8, 16, + 32]), + bbox_coder=dict( + type='DistanceAnglePointCoder', angle_version=angle_version), + loss_cls=dict( + type='mmdet.QualityFocalLoss', + use_sigmoid=True, + beta=2.0, + loss_weight=1.0), + loss_bbox=dict( + type='RotatedIoULoss', mode='linear', loss_weight=2.0), + with_objectness=False, + pred_kernel_size=1, + use_hbbox_loss=False, + scale_angle=False, + loss_angle=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='SiLU'), + train_cfg=dict( + assigner=dict( + type='mmdet.DynamicSoftLabelAssigner', + iou_calculator=dict(type='RBboxOverlaps2D'), + topk=13), + allowed_border=-1, + pos_weight=-1, + debug=False)).cuda() + + # Rotated RTMDet head expects a multiple levels of features per image + feats = ( + torch.rand(1, 1, s // stride[1], s // stride[0]).cuda() + for stride in rtm_head.prior_generator.strides) + cls_scores, bbox_preds, angle_preds = rtm_head.forward(feats) + + # Test that empty ground truth encourages the network to + # predict background + gt_instances = InstanceData() + gt_instances.bboxes = torch.empty((0, 5)).cuda() + gt_instances.labels = torch.LongTensor([]).cuda() + + empty_gt_losses = rtm_head.loss_by_feat(cls_scores, bbox_preds, + angle_preds, [gt_instances], + img_metas) + # When there is no truth, the cls loss should be nonzero but + # box loss and centerness loss should be zero + empty_cls_loss = sum(empty_gt_losses['loss_cls']) + empty_box_loss = sum(empty_gt_losses['loss_bbox']) + self.assertGreater(empty_cls_loss, 0, 'cls loss should be non-zero') + self.assertEqual( + empty_box_loss, 0, + 'there should be no box loss when there are no true boxes') + + # When truth is non-empty then all cls, box loss and centerness loss + # should be nonzero for random inputs + gt_instances = InstanceData() + gt_instances.bboxes = RotatedBoxes( + torch.Tensor([[130.6667, 86.8757, 100.6326, 70.8874, 0.2]]).cuda()) + gt_instances.labels = torch.LongTensor([2]).cuda() + + one_gt_losses = rtm_head.loss_by_feat(cls_scores, bbox_preds, + angle_preds, [gt_instances], + img_metas) + onegt_cls_loss = sum(one_gt_losses['loss_cls']) + onegt_box_loss = sum(one_gt_losses['loss_bbox']) + self.assertGreater(onegt_cls_loss, 0, 'cls loss should be non-zero') + self.assertGreater(onegt_box_loss, 0, 'box loss should be non-zero') + + # Test head with angle_loss + rtm_head.loss_angle = L1Loss(loss_weight=0.2) + with_ang_losses = rtm_head.loss_by_feat(cls_scores, bbox_preds, + angle_preds, [gt_instances], + img_metas) + with_ang_cls_loss = sum(with_ang_losses['loss_cls']) + with_ang_box_loss = sum(with_ang_losses['loss_bbox']) + with_ang_ang_loss = sum(with_ang_losses['loss_angle']) + + self.assertGreater(with_ang_cls_loss, 0, 'cls loss should be non-zero') + self.assertGreater(with_ang_box_loss, 0, 'box loss should be non-zero') + self.assertGreater(with_ang_ang_loss, 0, + 'angle loss should be non-zero') + + @parameterized.expand([(RotatedRTMDetHead, ), (RotatedRTMDetSepBNHead, )]) + def test_rotated_rtmdet_head_loss_with_hbb(self, head_cls): + """Tests rotated rtmdet head loss when truth is empty and non-empty.""" + angle_version = 'le90' + s = 256 + img_metas = [{ + 'img_shape': (s, s, 3), + 'pad_shape': (s, s, 3), + 'scale_factor': 1, + }] + rtm_head = head_cls( + num_classes=4, + in_channels=1, + feat_channels=1, + stacked_convs=1, + angle_version=angle_version, + anchor_generator=dict( + type='mmdet.MlvlPointGenerator', offset=0, strides=[8, 16, + 32]), + bbox_coder=dict( + type='DistanceAnglePointCoder', angle_version=angle_version), + loss_cls=dict( + type='mmdet.QualityFocalLoss', + use_sigmoid=True, + beta=2.0, + loss_weight=1.0), + loss_bbox=dict(type='mmdet.IoULoss', loss_weight=1.0), + angle_coder=dict( + type='CSLCoder', + angle_version='le90', + omega=1, + window='gaussian', + radius=1), + loss_angle=dict( + type='SmoothFocalLoss', gamma=2.0, alpha=0.25, + loss_weight=0.2), + with_objectness=False, + pred_kernel_size=1, + use_hbbox_loss=True, + scale_angle=False, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='SiLU'), + train_cfg=dict( + assigner=dict( + type='mmdet.DynamicSoftLabelAssigner', + iou_calculator=dict(type='RBboxOverlaps2D'), + topk=13), + allowed_border=-1, + pos_weight=-1, + debug=False)) + + feats = ( + torch.rand(1, 1, s // stride[1], s // stride[0]) + for stride in rtm_head.prior_generator.strides) + cls_scores, bbox_preds, angle_preds = rtm_head.forward(feats) + + # Test that empty ground truth encourages the network to + # predict background + gt_instances = InstanceData() + gt_instances.bboxes = torch.empty((0, 5)) + gt_instances.labels = torch.LongTensor([]) + + empty_gt_losses = rtm_head.loss_by_feat(cls_scores, bbox_preds, + angle_preds, [gt_instances], + img_metas) + # When there is no truth, the cls loss should be nonzero but + # box loss and centerness loss should be zero + empty_cls_loss = sum(empty_gt_losses['loss_cls']) + empty_box_loss = sum(empty_gt_losses['loss_bbox']) + empty_ang_loss = sum(empty_gt_losses['loss_angle']) + self.assertGreater(empty_cls_loss, 0, 'cls loss should be non-zero') + self.assertEqual( + empty_box_loss, 0, + 'there should be no box loss when there are no true boxes') + self.assertEqual( + empty_ang_loss, 0, + 'there should be no angle loss when there are no true boxes') + + # When truth is non-empty then all cls, box loss and centerness loss + # should be nonzero for random inputs + gt_instances = InstanceData() + gt_instances.bboxes = RotatedBoxes( + torch.Tensor([[130.6667, 86.8757, 100.6326, 70.8874, 0.2]])) + gt_instances.labels = torch.LongTensor([2]) + + one_gt_losses = rtm_head.loss_by_feat(cls_scores, bbox_preds, + angle_preds, [gt_instances], + img_metas) + onegt_cls_loss = sum(one_gt_losses['loss_cls']) + onegt_box_loss = sum(one_gt_losses['loss_bbox']) + onegt_ang_loss = sum(one_gt_losses['loss_angle']) + self.assertGreater(onegt_cls_loss, 0, 'cls loss should be non-zero') + self.assertGreater(onegt_box_loss, 0, 'box loss should be non-zero') + self.assertGreater(onegt_ang_loss, 0, 'angle loss should be non-zero') From 0de8bb8189522d2a6040db1f0972e0d6369c9ac1 Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Thu, 1 Dec 2022 16:03:06 +0800 Subject: [PATCH 34/52] update metafile --- configs/rotated_rtmdet/README.md | 2 +- configs/rotated_rtmdet/metafile.yml | 135 ++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+), 1 deletion(-) create mode 100644 configs/rotated_rtmdet/metafile.yml diff --git a/configs/rotated_rtmdet/README.md b/configs/rotated_rtmdet/README.md index 636770a90..dfa276128 100644 --- a/configs/rotated_rtmdet/README.md +++ b/configs/rotated_rtmdet/README.md @@ -7,7 +7,7 @@ Our tech-report will be released soon.
- +
## Results and Models diff --git a/configs/rotated_rtmdet/metafile.yml b/configs/rotated_rtmdet/metafile.yml new file mode 100644 index 000000000..cb481747c --- /dev/null +++ b/configs/rotated_rtmdet/metafile.yml @@ -0,0 +1,135 @@ +Collections: + - Name: rotated_rtmdet + Metadata: + Training Data: + - DOTAv1.0 + - HRSC + Training Techniques: + - AdamW + - Flat Cosine Annealing + Training Resources: 1x RTX3090 GPUs + Architecture: + - CSPNeXt + - CSPNeXtPAFPN + README: configs/rotated_rtmdet/README.md + +Models: + - Name: rotated_rtmdet_tiny-3x-dota + In Collection: rotated_rtmdet + Config: configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota.py + Metadata: + Training Data: DOTAv1.0 + Results: + - Task: Oriented Object Detection + Dataset: DOTAv1.0 + Metrics: + mAP: 75.60 + Weights: todo + + - Name: rotated_rtmdet_tiny-3x-dota_ms + In Collection: rotated_rtmdet + Config: configs/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota_ms.py + Metadata: + Training Data: DOTAv1.0 + Results: + - Task: Oriented Object Detection + Dataset: DOTAv1.0 + Metrics: + mAP: 79.82 + Weights: todo + + - Name: rotated_rtmdet_s-3x-dota + In Collection: rotated_rtmdet + Config: configs/rotated_rtmdet/rotated_rtmdet_s-3x-dota.py + Metadata: + Training Data: DOTAv1.0 + Results: + - Task: Oriented Object Detection + Dataset: DOTAv1.0 + Metrics: + mAP: 76.93 + Weights: todo + + - Name: rotated_rtmdet_s-3x-dota_ms + In Collection: rotated_rtmdet + Config: configs/rotated_rtmdet/rotated_rtmdet_s-3x-dota_ms.py + Metadata: + Training Data: DOTAv1.0 + Results: + - Task: Oriented Object Detection + Dataset: DOTAv1.0 + Metrics: + mAP: 79.98 + Weights: todo + + - Name: rotated_rtmdet_m-3x-dota + In Collection: rotated_rtmdet + Config: configs/rotated_rtmdet/rotated_rtmdet_m-3x-dota.py + Metadata: + Training Data: DOTAv1.0 + Results: + - Task: Oriented Object Detection + Dataset: DOTAv1.0 + Metrics: + mAP: 78.24 + Weights: todo + + - Name: rotated_rtmdet_m-3x-dota_ms + In Collection: rotated_rtmdet + Config: configs/rotated_rtmdet/rotated_rtmdet_m-3x-dota_ms.py + Metadata: + Training Data: DOTAv1.0 + Results: + - Task: Oriented Object Detection + Dataset: DOTAv1.0 + Metrics: + mAP: 80.26 + Weights: todo + + - Name: rotated_rtmdet_l-3x-dota + In Collection: rotated_rtmdet + Config: configs/rotated_rtmdet/rotated_rtmdet_l-3x-dota.py + Metadata: + Training Data: DOTAv1.0 + Results: + - Task: Oriented Object Detection + Dataset: DOTAv1.0 + Metrics: + mAP: 78.85 + Weights: todo + + - Name: rotated_rtmdet_l-3x-dota_ms + In Collection: rotated_rtmdet + Config: configs/rotated_rtmdet/rotated_rtmdet_l-3x-dota_ms.py + Metadata: + Training Data: DOTAv1.0 + Results: + - Task: Oriented Object Detection + Dataset: DOTAv1.0 + Metrics: + mAP: 80.54 + Weights: todo + + - Name: rotated_rtmdet_l-coco_pretrain-3x-dota_ms + In Collection: rotated_rtmdet + Config: configs/rotated_rtmdet/rotated_rtmdet_l-coco_pretrain-3x-dota_ms.py + Metadata: + Training Data: DOTAv1.0 + Results: + - Task: Oriented Object Detection + Dataset: DOTAv1.0 + Metrics: + mAP: 81.33 + Weights: todo + + - Name: rotated_rtmdet_tiny-9x-hrsc + In Collection: rotated_rtmdet + Config: configs/rotated_rtmdet/ + Metadata: + Training Data: HRSC + Results: + - Task: Oriented Object Detection + Dataset: HRSC + Metrics: + mAP: 90.6 + Weights: todo From 17eae1a152f8647851a8da4936a44f3315782d51 Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Thu, 1 Dec 2022 16:23:02 +0800 Subject: [PATCH 35/52] remove useless code --- .../models/task_modules/coders/__init__.py | 5 +-- .../models/task_modules/coders/angle_coder.py | 42 ------------------- 2 files changed, 2 insertions(+), 45 deletions(-) diff --git a/mmrotate/models/task_modules/coders/__init__.py b/mmrotate/models/task_modules/coders/__init__.py index 211c7e8c1..e50352579 100644 --- a/mmrotate/models/task_modules/coders/__init__.py +++ b/mmrotate/models/task_modules/coders/__init__.py @@ -1,6 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .angle_coder import (CSLCoder, DistributionAngleCoder, PSCCoder, - PseudoAngleCoder) +from .angle_coder import CSLCoder, PSCCoder, PseudoAngleCoder from .delta_midpointoffset_rbbox_coder import MidpointOffsetCoder from .delta_xywh_hbbox_coder import DeltaXYWHHBBoxCoder from .delta_xywh_qbbox_coder import DeltaXYWHQBBoxCoder @@ -13,5 +12,5 @@ 'DeltaXYWHTRBBoxCoder', 'DeltaXYWHTHBBoxCoder', 'MidpointOffsetCoder', 'GVFixCoder', 'GVRatioCoder', 'CSLCoder', 'PSCCoder', 'DistanceAnglePointCoder', 'DeltaXYWHHBBoxCoder', 'DeltaXYWHQBBoxCoder', - 'PseudoAngleCoder', 'DistributionAngleCoder' + 'PseudoAngleCoder' ] diff --git a/mmrotate/models/task_modules/coders/angle_coder.py b/mmrotate/models/task_modules/coders/angle_coder.py index 2158f33ba..4ac9f3b40 100644 --- a/mmrotate/models/task_modules/coders/angle_coder.py +++ b/mmrotate/models/task_modules/coders/angle_coder.py @@ -1,9 +1,7 @@ # Copyright (c) OpenMMLab. All rights reserved. import math -import numpy as np import torch -import torch.nn.functional as F from mmdet.models.task_modules.coders.base_bbox_coder import BaseBBoxCoder from torch import Tensor @@ -263,43 +261,3 @@ def decode(self, angle_preds: Tensor, keepdim: bool = False) -> Tensor: return angle_preds else: return angle_preds.squeeze(-1) - - -@TASK_UTILS.register_module() -class DistributionAngleCoder(BaseBBoxCoder): - """Distribution representation for angle. - - Args: - angle_version (str): Angle definition. - reg_max (int): Max value of integral. Defaults to 16. - """ - - def __init__(self, angle_version: str = 'le90', reg_max: int = 16): - super().__init__() - self.angle_range = 0.5 * np.pi if angle_version == 'oc' else np.pi - self.angle_offset_dict = { - 'oc': 0, - 'le90': 0.5 * np.pi, - 'le135': 0.25 * np.pi - } - self.angle_offset = self.angle_offset_dict[angle_version] - self.reg_max = reg_max - self.encode_size = reg_max + 1 - self.project = torch.linspace(0, self.reg_max, self.reg_max + 1) - - def encode(self, angle: Tensor) -> Tensor: - # Norm to (0~1)*reg_max - dfl_target = self.reg_max * (self.angle_offset + - angle) / self.angle_range - return dfl_target.flatten() - - def decode(self, angle: Tensor, keepdim: bool = False) -> Tensor: - decode_angle = F.softmax(angle.reshape(-1, self.reg_max + 1), dim=-1) - decode_angle = F.linear(decode_angle, self.project.type_as(angle)) - if keepdim: - decode_angle = decode_angle.reshape(*angle.shape[:-1], 1) - else: - decode_angle = decode_angle.reshape(-1) - decode_angle = self.angle_offset * decode_angle / self.reg_max - decode_angle = decode_angle - self.angle_offset - return decode_angle From 5721edf120806ed9156135541df1e16115f0c8c0 Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Thu, 1 Dec 2022 16:51:58 +0800 Subject: [PATCH 36/52] add img height --- configs/rotated_rtmdet/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/rotated_rtmdet/README.md b/configs/rotated_rtmdet/README.md index dfa276128..2df96c518 100644 --- a/configs/rotated_rtmdet/README.md +++ b/configs/rotated_rtmdet/README.md @@ -7,7 +7,7 @@ Our tech-report will be released soon.
- +
## Results and Models From 40663f030d6500d4f89a7862af7d0e56be42f4e0 Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Thu, 1 Dec 2022 17:03:16 +0800 Subject: [PATCH 37/52] typo --- configs/rotated_rtmdet/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/rotated_rtmdet/README.md b/configs/rotated_rtmdet/README.md index 2df96c518..335ea7223 100644 --- a/configs/rotated_rtmdet/README.md +++ b/configs/rotated_rtmdet/README.md @@ -17,7 +17,7 @@ Our tech-report will be released soon. | Backbone | size | pretrain | Aug | mAP | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) | Config | Download | | :---------: | :--: | :------: | :---: | :---: | :-------: | :------: | :------------------: | :------------------------------------------------------: | :-----------------------: | | RTMDet-tiny | 1024 | IMP | RR | 75.60 | 4.88 | 20.45 | 4.40 | [config](./rotated_rtmdet_tiny-3x-dota.py) | [model](<>) \| [log](<>) | -| RTMDet-tiny | 1024 | IMP | MS+RR | 79.82 | 4.88 | 20.45 | 4.46 | [config](./rotated_rtmdet_tiny-3x-dota_ms.py) | [model](<>) \| [log](<>) | +| RTMDet-tiny | 1024 | IMP | MS+RR | 79.82 | 4.88 | 20.45 | 4.40 | [config](./rotated_rtmdet_tiny-3x-dota_ms.py) | [model](<>) \| [log](<>) | | RTMDet-s | 1024 | IMP | RR | 76.93 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota.py) | [model](<>) \| [log](<>) | | RTMDet-s | 1024 | IMP | MS+RR | 79.98 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota_ms.py) | [model](<>) \| [log](<>) | | RTMDet-m | 1024 | IMP | RR | 78.24 | 24.67 | 99.76 | 7.82 | [config](./rotated_rtmdet_m-3x-dota.py) | [model](<>) \| [log](<>) | From b5599f1f8e578d349ef2a61b79f7656281bcbae6 Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Fri, 2 Dec 2022 09:10:27 +0800 Subject: [PATCH 38/52] update results --- configs/rotated_rtmdet/README.md | 35 ++++++++++++++++---------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/configs/rotated_rtmdet/README.md b/configs/rotated_rtmdet/README.md index 335ea7223..e7ff6a283 100644 --- a/configs/rotated_rtmdet/README.md +++ b/configs/rotated_rtmdet/README.md @@ -14,28 +14,29 @@ Our tech-report will be released soon. ### DOTA-v1.0 -| Backbone | size | pretrain | Aug | mAP | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) | Config | Download | -| :---------: | :--: | :------: | :---: | :---: | :-------: | :------: | :------------------: | :------------------------------------------------------: | :-----------------------: | -| RTMDet-tiny | 1024 | IMP | RR | 75.60 | 4.88 | 20.45 | 4.40 | [config](./rotated_rtmdet_tiny-3x-dota.py) | [model](<>) \| [log](<>) | -| RTMDet-tiny | 1024 | IMP | MS+RR | 79.82 | 4.88 | 20.45 | 4.40 | [config](./rotated_rtmdet_tiny-3x-dota_ms.py) | [model](<>) \| [log](<>) | -| RTMDet-s | 1024 | IMP | RR | 76.93 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota.py) | [model](<>) \| [log](<>) | -| RTMDet-s | 1024 | IMP | MS+RR | 79.98 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota_ms.py) | [model](<>) \| [log](<>) | -| RTMDet-m | 1024 | IMP | RR | 78.24 | 24.67 | 99.76 | 7.82 | [config](./rotated_rtmdet_m-3x-dota.py) | [model](<>) \| [log](<>) | -| RTMDet-m | 1024 | IMP | MS+RR | 80.26 | 24.67 | 99.76 | 7.82 | [config](./rotated_rtmdet_m-3x-dota_ms.py) | [model](<>) \| [log](<>) | -| RTMDet-l | 1024 | IMP | RR | 78.85 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-3x-dota.py) | [model](<>) \| [log](<>) | -| RTMDet-l | 1024 | IMP | MS+RR | 80.54 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-3x-dota_ms.py) | [model](<>) \| [log](<>) | -| RTMDet-l | 1024 | COP | MS+RR | 81.33 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-coco_pretrain-3x-dota_ms.py) | [model](<>) \| [log](<>) | +| Backbone | size | pretrain | Aug | mmAP | mAP50 | mAP75 | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) | Config | Download | +| :---------: | :--: | :------: | :---: | :---: | :---: | :---: | :-------: | :------: | :------------------: | :------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------: | +| RTMDet-tiny | 1024 | IMP | RR | 47.37 | 75.36 | 50.64 | 4.88 | 20.45 | 4.40 | [config](./rotated_rtmdet_tiny-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota/) \| [log](<>) | +| RTMDet-tiny | 1024 | IMP | MS+RR | 53.59 | 79.82 | 58.87 | 4.88 | 20.45 | 4.40 | [config](./rotated_rtmdet_tiny-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota_ms/) \| [log](<>) | +| RTMDet-s | 1024 | IMP | RR | 48.16 | 76.93 | 50.59 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_s-3x-dota/) \| [log](<>) | +| RTMDet-s | 1024 | IMP | MS+RR | 54.43 | 79.98 | 60.07 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_s-3x-dota_ms/) \| [log](<>) | +| RTMDet-m | 1024 | IMP | RR | 50.56 | 78.24 | 54.47 | 24.67 | 99.76 | 7.82 | [config](./rotated_rtmdet_m-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_m-3x-dota/) \| [log](<>) | +| RTMDet-m | 1024 | IMP | MS+RR | 55.00 | 80.26 | 61.26 | 24.67 | 99.76 | 7.82 | [config](./rotated_rtmdet_m-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_m-3x-dota_ms/) \| [log](<>) | +| RTMDet-l | 1024 | IMP | RR | 51.01 | 78.85 | 55.21 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-3x-dota/) \| [log](<>) | +| RTMDet-l | 1024 | IMP | MS+RR | 55.52 | 80.54 | 61.47 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-3x-dota_ms/) \| [log](<>) | +| RTMDet-l | 1024 | COP | MS+RR | 56.74 | 81.33 | 63.45 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-coco_pretrain-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-coco_pretrain-3x-dota_ms/) \| [log](<>) | ### HRSC -| Backbone | size | pretrain | Aug | mAP 07 | mAP 12 | Params(M) | FLOPS(G) | Config | Download | -| :---------: | :--: | :------: | :-: | :----: | :----: | :-------: | :------: | :----------------------------------------: | :-----------------------: | -| RTMDet-tiny | 800 | IMP | RR | 90.6 | 97.1 | 4.88 | 12.54 | [config](./rotated_rtmdet_tiny-9x-hrsc.py) | [model](<>) \| [log](<>) | +| Backbone | size | pretrain | Aug | mAP 07 | mAP 12 | Params(M) | FLOPS(G) | Config | Download | +| :---------: | :--: | :------: | :-: | :----: | :----: | :-------: | :------: | :----------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| RTMDet-tiny | 800 | IMP | RR | 90.6 | 97.1 | 4.88 | 12.54 | [config](./rotated_rtmdet_tiny-9x-hrsc.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-9x-hrsc/rotated_rtmdet_tiny-9x-hrsc-9f2e3ca6.pth) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-9x-hrsc/rotated_rtmdet_tiny-9x-hrsc-20221125_145920.json) | **Note**: 1. By default, DOTA-v1.0 dataset trained with 3x schedule and HRSC dataset trained with 9x schedule. -2. `IMP` means ImageNet pretrain, `COP` means COCO pretrain. -3. The inference speed is measured on an NVIDIA 2080Ti GPU with TensorRT 8.4.3, cuDNN 8.2.0, FP16, batch size=1, and +2. We follow the latest metrics from the DOTA evaluation server, original voc format mAP is now mAP50. +3. `IMP` means ImageNet pretrain, `COP` means COCO pretrain. +4. The inference speed is measured on an NVIDIA 2080Ti GPU with TensorRT 8.4.3, cuDNN 8.2.0, FP16, batch size=1, and without NMS. -4. We also provide config with mixup and mosaic for longer schedule. +5. We also provide config with mixup and mosaic for longer schedule. From 22fd3e2c8596637ada65e9bae8fbcb5c3fe11c32 Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Fri, 2 Dec 2022 09:11:48 +0800 Subject: [PATCH 39/52] fix table --- configs/rotated_rtmdet/README.md | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/configs/rotated_rtmdet/README.md b/configs/rotated_rtmdet/README.md index e7ff6a283..5b1e489b5 100644 --- a/configs/rotated_rtmdet/README.md +++ b/configs/rotated_rtmdet/README.md @@ -14,23 +14,23 @@ Our tech-report will be released soon. ### DOTA-v1.0 -| Backbone | size | pretrain | Aug | mmAP | mAP50 | mAP75 | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) | Config | Download | -| :---------: | :--: | :------: | :---: | :---: | :---: | :---: | :-------: | :------: | :------------------: | :------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------: | -| RTMDet-tiny | 1024 | IMP | RR | 47.37 | 75.36 | 50.64 | 4.88 | 20.45 | 4.40 | [config](./rotated_rtmdet_tiny-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota/) \| [log](<>) | -| RTMDet-tiny | 1024 | IMP | MS+RR | 53.59 | 79.82 | 58.87 | 4.88 | 20.45 | 4.40 | [config](./rotated_rtmdet_tiny-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota_ms/) \| [log](<>) | -| RTMDet-s | 1024 | IMP | RR | 48.16 | 76.93 | 50.59 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_s-3x-dota/) \| [log](<>) | -| RTMDet-s | 1024 | IMP | MS+RR | 54.43 | 79.98 | 60.07 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_s-3x-dota_ms/) \| [log](<>) | -| RTMDet-m | 1024 | IMP | RR | 50.56 | 78.24 | 54.47 | 24.67 | 99.76 | 7.82 | [config](./rotated_rtmdet_m-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_m-3x-dota/) \| [log](<>) | -| RTMDet-m | 1024 | IMP | MS+RR | 55.00 | 80.26 | 61.26 | 24.67 | 99.76 | 7.82 | [config](./rotated_rtmdet_m-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_m-3x-dota_ms/) \| [log](<>) | -| RTMDet-l | 1024 | IMP | RR | 51.01 | 78.85 | 55.21 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-3x-dota/) \| [log](<>) | -| RTMDet-l | 1024 | IMP | MS+RR | 55.52 | 80.54 | 61.47 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-3x-dota_ms/) \| [log](<>) | -| RTMDet-l | 1024 | COP | MS+RR | 56.74 | 81.33 | 63.45 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-coco_pretrain-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-coco_pretrain-3x-dota_ms/) \| [log](<>) | +| Backbone | size | pretrain | Aug | mmAP | mAP50 | mAP75 | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) | Config | Download | +| :---------: | :--: | :------: | :---: | :---: | :---: | :---: | :-------: | :------: | :------------------: | :------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------: | +| RTMDet-tiny | 1024 | IMP | RR | 47.37 | 75.36 | 50.64 | 4.88 | 20.45 | 4.40 | [config](./rotated_rtmdet_tiny-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota/) \| [log](<>) | +| RTMDet-tiny | 1024 | IMP | MS+RR | 53.59 | 79.82 | 58.87 | 4.88 | 20.45 | 4.40 | [config](./rotated_rtmdet_tiny-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota_ms/) \| [log](<>) | +| RTMDet-s | 1024 | IMP | RR | 48.16 | 76.93 | 50.59 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_s-3x-dota/) \| [log](<>) | +| RTMDet-s | 1024 | IMP | MS+RR | 54.43 | 79.98 | 60.07 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_s-3x-dota_ms/) \| [log](<>) | +| RTMDet-m | 1024 | IMP | RR | 50.56 | 78.24 | 54.47 | 24.67 | 99.76 | 7.82 | [config](./rotated_rtmdet_m-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_m-3x-dota/) \| [log](<>) | +| RTMDet-m | 1024 | IMP | MS+RR | 55.00 | 80.26 | 61.26 | 24.67 | 99.76 | 7.82 | [config](./rotated_rtmdet_m-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_m-3x-dota_ms/) \| [log](<>) | +| RTMDet-l | 1024 | IMP | RR | 51.01 | 78.85 | 55.21 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-3x-dota/) \| [log](<>) | +| RTMDet-l | 1024 | IMP | MS+RR | 55.52 | 80.54 | 61.47 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-3x-dota_ms/) \| [log](<>) | +| RTMDet-l | 1024 | COP | MS+RR | 56.74 | 81.33 | 63.45 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-coco_pretrain-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-coco_pretrain-3x-dota_ms/) \| [log](<>) | ### HRSC -| Backbone | size | pretrain | Aug | mAP 07 | mAP 12 | Params(M) | FLOPS(G) | Config | Download | -| :---------: | :--: | :------: | :-: | :----: | :----: | :-------: | :------: | :----------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| RTMDet-tiny | 800 | IMP | RR | 90.6 | 97.1 | 4.88 | 12.54 | [config](./rotated_rtmdet_tiny-9x-hrsc.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-9x-hrsc/rotated_rtmdet_tiny-9x-hrsc-9f2e3ca6.pth) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-9x-hrsc/rotated_rtmdet_tiny-9x-hrsc-20221125_145920.json) | +| Backbone | size | pretrain | Aug | mAP 07 | mAP 12 | Params(M) | FLOPS(G) | Config | Download | +| :---------: | :--: | :------: | :-: | :----: | :----: | :-------: | :------: | :----------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| RTMDet-tiny | 800 | IMP | RR | 90.6 | 97.1 | 4.88 | 12.54 | [config](./rotated_rtmdet_tiny-9x-hrsc.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-9x-hrsc/rotated_rtmdet_tiny-9x-hrsc-9f2e3ca6.pth) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-9x-hrsc/rotated_rtmdet_tiny-9x-hrsc-20221125_145920.json) | **Note**: From 31eb911503d7c13dad699f4f18ea96a6fa68ff4e Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Fri, 2 Dec 2022 09:16:53 +0800 Subject: [PATCH 40/52] optimize tables --- configs/rotated_rtmdet/README.md | 41 +++++++++++++++++--------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/configs/rotated_rtmdet/README.md b/configs/rotated_rtmdet/README.md index 5b1e489b5..1e12511c9 100644 --- a/configs/rotated_rtmdet/README.md +++ b/configs/rotated_rtmdet/README.md @@ -14,29 +14,32 @@ Our tech-report will be released soon. ### DOTA-v1.0 -| Backbone | size | pretrain | Aug | mmAP | mAP50 | mAP75 | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) | Config | Download | -| :---------: | :--: | :------: | :---: | :---: | :---: | :---: | :-------: | :------: | :------------------: | :------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------: | -| RTMDet-tiny | 1024 | IMP | RR | 47.37 | 75.36 | 50.64 | 4.88 | 20.45 | 4.40 | [config](./rotated_rtmdet_tiny-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota/) \| [log](<>) | -| RTMDet-tiny | 1024 | IMP | MS+RR | 53.59 | 79.82 | 58.87 | 4.88 | 20.45 | 4.40 | [config](./rotated_rtmdet_tiny-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota_ms/) \| [log](<>) | -| RTMDet-s | 1024 | IMP | RR | 48.16 | 76.93 | 50.59 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_s-3x-dota/) \| [log](<>) | -| RTMDet-s | 1024 | IMP | MS+RR | 54.43 | 79.98 | 60.07 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_s-3x-dota_ms/) \| [log](<>) | -| RTMDet-m | 1024 | IMP | RR | 50.56 | 78.24 | 54.47 | 24.67 | 99.76 | 7.82 | [config](./rotated_rtmdet_m-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_m-3x-dota/) \| [log](<>) | -| RTMDet-m | 1024 | IMP | MS+RR | 55.00 | 80.26 | 61.26 | 24.67 | 99.76 | 7.82 | [config](./rotated_rtmdet_m-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_m-3x-dota_ms/) \| [log](<>) | -| RTMDet-l | 1024 | IMP | RR | 51.01 | 78.85 | 55.21 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-3x-dota/) \| [log](<>) | -| RTMDet-l | 1024 | IMP | MS+RR | 55.52 | 80.54 | 61.47 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-3x-dota_ms/) \| [log](<>) | -| RTMDet-l | 1024 | COP | MS+RR | 56.74 | 81.33 | 63.45 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-coco_pretrain-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-coco_pretrain-3x-dota_ms/) \| [log](<>) | +| Backbone | pretrain | Aug | mmAP | mAP50 | mAP75 | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) | Config | Download | +| :---------: | :------: | :---: | :---: | :---: | :---: | :-------: | :------: | :------------------: | :------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------: | +| RTMDet-tiny | IMP | RR | 47.37 | 75.36 | 50.64 | 4.88 | 20.45 | 4.40 | [config](./rotated_rtmdet_tiny-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota/) \| [log](<>) | +| RTMDet-tiny | IMP | MS+RR | 53.59 | 79.82 | 58.87 | 4.88 | 20.45 | 4.40 | [config](./rotated_rtmdet_tiny-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota_ms/) \| [log](<>) | +| RTMDet-s | IMP | RR | 48.16 | 76.93 | 50.59 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_s-3x-dota/) \| [log](<>) | +| RTMDet-s | IMP | MS+RR | 54.43 | 79.98 | 60.07 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_s-3x-dota_ms/) \| [log](<>) | +| RTMDet-m | IMP | RR | 50.56 | 78.24 | 54.47 | 24.67 | 99.76 | 7.82 | [config](./rotated_rtmdet_m-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_m-3x-dota/) \| [log](<>) | +| RTMDet-m | IMP | MS+RR | 55.00 | 80.26 | 61.26 | 24.67 | 99.76 | 7.82 | [config](./rotated_rtmdet_m-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_m-3x-dota_ms/) \| [log](<>) | +| RTMDet-l | IMP | RR | 51.01 | 78.85 | 55.21 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-3x-dota/) \| [log](<>) | +| RTMDet-l | IMP | MS+RR | 55.52 | 80.54 | 61.47 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-3x-dota_ms/) \| [log](<>) | +| RTMDet-l | COP | MS+RR | 56.74 | 81.33 | 63.45 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-coco_pretrain-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-coco_pretrain-3x-dota_ms/) \| [log](<>) | + +- By default, DOTA-v1.0 dataset trained with 3x schedule and image size 1024\*1024. ### HRSC -| Backbone | size | pretrain | Aug | mAP 07 | mAP 12 | Params(M) | FLOPS(G) | Config | Download | -| :---------: | :--: | :------: | :-: | :----: | :----: | :-------: | :------: | :----------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| RTMDet-tiny | 800 | IMP | RR | 90.6 | 97.1 | 4.88 | 12.54 | [config](./rotated_rtmdet_tiny-9x-hrsc.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-9x-hrsc/rotated_rtmdet_tiny-9x-hrsc-9f2e3ca6.pth) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-9x-hrsc/rotated_rtmdet_tiny-9x-hrsc-20221125_145920.json) | +| Backbone | pretrain | Aug | mAP 07 | mAP 12 | Params(M) | FLOPS(G) | Config | Download | +| :---------: | :------: | :-: | :----: | :----: | :-------: | :------: | :----------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| RTMDet-tiny | IMP | RR | 90.6 | 97.1 | 4.88 | 12.54 | [config](./rotated_rtmdet_tiny-9x-hrsc.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-9x-hrsc/rotated_rtmdet_tiny-9x-hrsc-9f2e3ca6.pth) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-9x-hrsc/rotated_rtmdet_tiny-9x-hrsc-20221125_145920.json) | + +- By default, HRSC dataset trained with 9x schedule and image size 800\*800. **Note**: -1. By default, DOTA-v1.0 dataset trained with 3x schedule and HRSC dataset trained with 9x schedule. -2. We follow the latest metrics from the DOTA evaluation server, original voc format mAP is now mAP50. -3. `IMP` means ImageNet pretrain, `COP` means COCO pretrain. -4. The inference speed is measured on an NVIDIA 2080Ti GPU with TensorRT 8.4.3, cuDNN 8.2.0, FP16, batch size=1, and +1. We follow the latest metrics from the DOTA evaluation server, original voc format mAP is now mAP50. +2. `IMP` means ImageNet pretrain, `COP` means COCO pretrain. +3. The inference speed is measured on an NVIDIA 2080Ti GPU with TensorRT 8.4.3, cuDNN 8.2.0, FP16, batch size=1, and without NMS. -5. We also provide config with mixup and mosaic for longer schedule. +4. We also provide config with mixup and mosaic for longer schedule. From 2fcf751e3abb3b523851fb45b7ce09d760e8d40b Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Fri, 2 Dec 2022 14:32:13 +0800 Subject: [PATCH 41/52] add --- configs/rotated_rtmdet/README.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/configs/rotated_rtmdet/README.md b/configs/rotated_rtmdet/README.md index 1e12511c9..44c9f2652 100644 --- a/configs/rotated_rtmdet/README.md +++ b/configs/rotated_rtmdet/README.md @@ -14,17 +14,17 @@ Our tech-report will be released soon. ### DOTA-v1.0 -| Backbone | pretrain | Aug | mmAP | mAP50 | mAP75 | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) | Config | Download | -| :---------: | :------: | :---: | :---: | :---: | :---: | :-------: | :------: | :------------------: | :------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------: | -| RTMDet-tiny | IMP | RR | 47.37 | 75.36 | 50.64 | 4.88 | 20.45 | 4.40 | [config](./rotated_rtmdet_tiny-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota/) \| [log](<>) | -| RTMDet-tiny | IMP | MS+RR | 53.59 | 79.82 | 58.87 | 4.88 | 20.45 | 4.40 | [config](./rotated_rtmdet_tiny-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota_ms/) \| [log](<>) | -| RTMDet-s | IMP | RR | 48.16 | 76.93 | 50.59 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_s-3x-dota/) \| [log](<>) | -| RTMDet-s | IMP | MS+RR | 54.43 | 79.98 | 60.07 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_s-3x-dota_ms/) \| [log](<>) | -| RTMDet-m | IMP | RR | 50.56 | 78.24 | 54.47 | 24.67 | 99.76 | 7.82 | [config](./rotated_rtmdet_m-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_m-3x-dota/) \| [log](<>) | -| RTMDet-m | IMP | MS+RR | 55.00 | 80.26 | 61.26 | 24.67 | 99.76 | 7.82 | [config](./rotated_rtmdet_m-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_m-3x-dota_ms/) \| [log](<>) | -| RTMDet-l | IMP | RR | 51.01 | 78.85 | 55.21 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-3x-dota/) \| [log](<>) | -| RTMDet-l | IMP | MS+RR | 55.52 | 80.54 | 61.47 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-3x-dota_ms/) \| [log](<>) | -| RTMDet-l | COP | MS+RR | 56.74 | 81.33 | 63.45 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-coco_pretrain-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-coco_pretrain-3x-dota_ms/) \| [log](<>) | +| Backbone | pretrain | Aug | mmAP | mAP50 | mAP75 | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) | Config | Download | +| :---------: | :------: | :---: | :---: | :---: | :---: | :-------: | :------: | :------------------: | :------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| RTMDet-tiny | IMP | RR | 47.37 | 75.36 | 50.64 | 4.88 | 20.45 | 4.40 | [config](./rotated_rtmdet_tiny-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota/) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota/) | +| RTMDet-tiny | IMP | MS+RR | 53.59 | 79.82 | 58.87 | 4.88 | 20.45 | 4.40 | [config](./rotated_rtmdet_tiny-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota_ms/) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota_ms/) | +| RTMDet-s | IMP | RR | 48.16 | 76.93 | 50.59 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_s-3x-dota/) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_s-3x-dota/) | +| RTMDet-s | IMP | MS+RR | 54.43 | 79.98 | 60.07 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_s-3x-dota_ms/) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_s-3x-dota_ms/) | +| RTMDet-m | IMP | RR | 50.56 | 78.24 | 54.47 | 24.67 | 99.76 | 7.82 | [config](./rotated_rtmdet_m-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_m-3x-dota/) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_m-3x-dota/) | +| RTMDet-m | IMP | MS+RR | 55.00 | 80.26 | 61.26 | 24.67 | 99.76 | 7.82 | [config](./rotated_rtmdet_m-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_m-3x-dota_ms/) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_m-3x-dota_ms/) | +| RTMDet-l | IMP | RR | 51.01 | 78.85 | 55.21 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-3x-dota/) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-3x-dota/) | +| RTMDet-l | IMP | MS+RR | 55.52 | 80.54 | 61.47 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-3x-dota_ms/) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-3x-dota_ms/) | +| RTMDet-l | COP | MS+RR | 56.74 | 81.33 | 63.45 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-coco_pretrain-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-coco_pretrain-3x-dota_ms/) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-coco_pretrain-3x-dota_ms/) | - By default, DOTA-v1.0 dataset trained with 3x schedule and image size 1024\*1024. From b454514dd05047a80a10c8f6e52e859927c99c71 Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Fri, 2 Dec 2022 16:00:23 +0800 Subject: [PATCH 42/52] fix config --- configs/rotated_rtmdet/README.md | 10 ++++++++++ .../rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota.py | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/configs/rotated_rtmdet/README.md b/configs/rotated_rtmdet/README.md index 44c9f2652..a005a7ec8 100644 --- a/configs/rotated_rtmdet/README.md +++ b/configs/rotated_rtmdet/README.md @@ -36,6 +36,16 @@ Our tech-report will be released soon. - By default, HRSC dataset trained with 9x schedule and image size 800\*800. +### Stronger augmentation + +We also provide configs with Mixup and Mosaic. Training time is less than MS. + +DOTA: + +| Backbone | pretrain | schedule | Aug | mmAP | mAP50 | mAP75 | Config | Download | +| :------: | :------: | :------: | :-------------: | :---: | :---: | :---: | :-------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| RTMDet-l | IMP | 100e | Mixup+Mosaic+RR | 54.59 | 80.16 | 61.16 | [config](./rotated_rtmdet_l-100e-aug-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota/) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota/) | + **Note**: 1. We follow the latest metrics from the DOTA evaluation server, original voc format mAP is now mAP50. diff --git a/configs/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota.py b/configs/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota.py index cad3da51f..fd83a62dd 100644 --- a/configs/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota.py +++ b/configs/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota.py @@ -121,7 +121,7 @@ type='mmdet.RandomResize', resize_type='mmdet.Resize', scale=(1024, 1024), - ratio_range=(0.5, 2.0), + ratio_range=(0.1, 2.0), keep_ratio=True), dict( type='RandomRotate', From 8acca42347a17484906511fe80db29754353992b Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Fri, 2 Dec 2022 16:23:54 +0800 Subject: [PATCH 43/52] upload --- configs/rotated_rtmdet/README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/configs/rotated_rtmdet/README.md b/configs/rotated_rtmdet/README.md index a005a7ec8..9bbcfc1cf 100644 --- a/configs/rotated_rtmdet/README.md +++ b/configs/rotated_rtmdet/README.md @@ -32,19 +32,19 @@ Our tech-report will be released soon. | Backbone | pretrain | Aug | mAP 07 | mAP 12 | Params(M) | FLOPS(G) | Config | Download | | :---------: | :------: | :-: | :----: | :----: | :-------: | :------: | :----------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| RTMDet-tiny | IMP | RR | 90.6 | 97.1 | 4.88 | 12.54 | [config](./rotated_rtmdet_tiny-9x-hrsc.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-9x-hrsc/rotated_rtmdet_tiny-9x-hrsc-9f2e3ca6.pth) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-9x-hrsc/rotated_rtmdet_tiny-9x-hrsc-20221125_145920.json) | +| RTMDet-tiny | IMP | RR | 90.6 | 97.1 | 4.88 | 12.54 | [config](./rotated_rtmdet_tiny-9x-hrsc.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-9x-hrsc/rotated_rtmdet_tiny-9x-hrsc-9f2e3ca6.pth) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-9x-hrsc/rotated_rtmdet_tiny-9x-hrsc_20221125_145920.json) | - By default, HRSC dataset trained with 9x schedule and image size 800\*800. ### Stronger augmentation -We also provide configs with Mixup and Mosaic. Training time is less than MS. +We also provide configs with Mixup, Mosaic and RandomRotate. Training time is less than MS. DOTA: -| Backbone | pretrain | schedule | Aug | mmAP | mAP50 | mAP75 | Config | Download | -| :------: | :------: | :------: | :-------------: | :---: | :---: | :---: | :-------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| RTMDet-l | IMP | 100e | Mixup+Mosaic+RR | 54.59 | 80.16 | 61.16 | [config](./rotated_rtmdet_l-100e-aug-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota/) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota/) | +| Backbone | pretrain | schedule | Aug | mmAP | mAP50 | mAP75 | Config | Download | +| :------: | :------: | :------: | :-------------: | :---: | :---: | :---: | :-------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| RTMDet-l | IMP | 100e | Mixup+Mosaic+RR | 54.59 | 80.16 | 61.16 | [config](./rotated_rtmdet_l-100e-aug-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota/rotated_rtmdet_l-100e-aug-dota-bc59fd88.pth) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota/rotated_rtmdet_l-100e-aug-dota_20221124_224135.json) | **Note**: From 2e3529da52abf3686be36c04ea128beca0bf290b Mon Sep 17 00:00:00 2001 From: liuyanyi Date: Wed, 7 Dec 2022 20:23:33 +0800 Subject: [PATCH 44/52] update pth link --- configs/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota.py | 2 +- configs/rotated_rtmdet/rotated_rtmdet_l-300e-aug-hrsc.py | 2 +- configs/rotated_rtmdet/rotated_rtmdet_l-3x-dota.py | 2 +- configs/rotated_rtmdet/rotated_rtmdet_l-3x-dota_ms.py | 2 +- configs/rotated_rtmdet/rotated_rtmdet_l-9x-hrsc.py | 2 +- configs/rotated_rtmdet/rotated_rtmdet_m-3x-dota.py | 2 +- configs/rotated_rtmdet/rotated_rtmdet_m-3x-dota_ms.py | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota.py b/configs/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota.py index fd83a62dd..329e7bb36 100644 --- a/configs/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota.py +++ b/configs/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota.py @@ -2,7 +2,7 @@ './_base_/default_runtime.py', './_base_/schedule_3x.py', './_base_/dota_rr.py' ] -checkpoint = './work_dirs/cspnext_rsb_pretrain/cspnext-l_8xb256-rsb-a1-600e_in1k/cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth' # noqa +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth' # noqa angle_version = 'le90' model = dict( diff --git a/configs/rotated_rtmdet/rotated_rtmdet_l-300e-aug-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_l-300e-aug-hrsc.py index eb178c0a1..f7c44d06d 100644 --- a/configs/rotated_rtmdet/rotated_rtmdet_l-300e-aug-hrsc.py +++ b/configs/rotated_rtmdet/rotated_rtmdet_l-300e-aug-hrsc.py @@ -2,7 +2,7 @@ './_base_/default_runtime.py', './_base_/schedule_3x.py', './_base_/hrsc_rr.py' ] -checkpoint = './work_dirs/cspnext_rsb_pretrain/cspnext-l_8xb256-rsb-a1-600e_in1k/cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth' # noqa +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth' # noqa angle_version = 'le90' model = dict( diff --git a/configs/rotated_rtmdet/rotated_rtmdet_l-3x-dota.py b/configs/rotated_rtmdet/rotated_rtmdet_l-3x-dota.py index e451f9fd0..7587fbb19 100644 --- a/configs/rotated_rtmdet/rotated_rtmdet_l-3x-dota.py +++ b/configs/rotated_rtmdet/rotated_rtmdet_l-3x-dota.py @@ -2,7 +2,7 @@ './_base_/default_runtime.py', './_base_/schedule_3x.py', './_base_/dota_rr.py' ] -checkpoint = './work_dirs/cspnext_rsb_pretrain/cspnext-l_8xb256-rsb-a1-600e_in1k/cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth' # noqa +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth' # noqa angle_version = 'le90' model = dict( diff --git a/configs/rotated_rtmdet/rotated_rtmdet_l-3x-dota_ms.py b/configs/rotated_rtmdet/rotated_rtmdet_l-3x-dota_ms.py index 91d07d543..d5395b501 100644 --- a/configs/rotated_rtmdet/rotated_rtmdet_l-3x-dota_ms.py +++ b/configs/rotated_rtmdet/rotated_rtmdet_l-3x-dota_ms.py @@ -2,7 +2,7 @@ './_base_/default_runtime.py', './_base_/schedule_3x.py', './_base_/dota_rr_ms.py' ] -checkpoint = './work_dirs/cspnext_rsb_pretrain/cspnext-l_8xb256-rsb-a1-600e_in1k/cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth' # noqa +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth' # noqa angle_version = 'le90' model = dict( diff --git a/configs/rotated_rtmdet/rotated_rtmdet_l-9x-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_l-9x-hrsc.py index a2b45734f..76659ef59 100644 --- a/configs/rotated_rtmdet/rotated_rtmdet_l-9x-hrsc.py +++ b/configs/rotated_rtmdet/rotated_rtmdet_l-9x-hrsc.py @@ -2,7 +2,7 @@ './_base_/default_runtime.py', './_base_/schedule_3x.py', './_base_/hrsc_rr.py' ] -checkpoint = './work_dirs/cspnext_rsb_pretrain/cspnext-l_8xb256-rsb-a1-600e_in1k/cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth' # noqa +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth' # noqa angle_version = 'le90' model = dict( diff --git a/configs/rotated_rtmdet/rotated_rtmdet_m-3x-dota.py b/configs/rotated_rtmdet/rotated_rtmdet_m-3x-dota.py index 3118ce98a..b341449f8 100644 --- a/configs/rotated_rtmdet/rotated_rtmdet_m-3x-dota.py +++ b/configs/rotated_rtmdet/rotated_rtmdet_m-3x-dota.py @@ -1,6 +1,6 @@ _base_ = './rotated_rtmdet_l-3x-dota.py' -checkpoint = './work_dirs/cspnext_rsb_pretrain/cspnext-m_8xb256-rsb-a1-600e_in1k/cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth' # noqa +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth' # noqa model = dict( backbone=dict( diff --git a/configs/rotated_rtmdet/rotated_rtmdet_m-3x-dota_ms.py b/configs/rotated_rtmdet/rotated_rtmdet_m-3x-dota_ms.py index 5182fa65a..eb4326606 100644 --- a/configs/rotated_rtmdet/rotated_rtmdet_m-3x-dota_ms.py +++ b/configs/rotated_rtmdet/rotated_rtmdet_m-3x-dota_ms.py @@ -1,6 +1,6 @@ _base_ = './rotated_rtmdet_l-3x-dota_ms.py' -checkpoint = './work_dirs/cspnext_rsb_pretrain/cspnext-m_8xb256-rsb-a1-600e_in1k/cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth' # noqa +checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth' # noqa model = dict( backbone=dict( From 6ed8d1c454726e8e41cd3b03a88d2ec4edb7573b Mon Sep 17 00:00:00 2001 From: Yanyi Liu Date: Thu, 15 Dec 2022 18:05:01 +0800 Subject: [PATCH 45/52] add readme pth --- configs/rotated_rtmdet/README.md | 34 ++++++++++++++--------------- configs/rotated_rtmdet/metafile.yml | 32 ++++++++++++++++++--------- 2 files changed, 38 insertions(+), 28 deletions(-) diff --git a/configs/rotated_rtmdet/README.md b/configs/rotated_rtmdet/README.md index 9bbcfc1cf..97df01810 100644 --- a/configs/rotated_rtmdet/README.md +++ b/configs/rotated_rtmdet/README.md @@ -14,17 +14,17 @@ Our tech-report will be released soon. ### DOTA-v1.0 -| Backbone | pretrain | Aug | mmAP | mAP50 | mAP75 | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) | Config | Download | -| :---------: | :------: | :---: | :---: | :---: | :---: | :-------: | :------: | :------------------: | :------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| RTMDet-tiny | IMP | RR | 47.37 | 75.36 | 50.64 | 4.88 | 20.45 | 4.40 | [config](./rotated_rtmdet_tiny-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota/) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota/) | -| RTMDet-tiny | IMP | MS+RR | 53.59 | 79.82 | 58.87 | 4.88 | 20.45 | 4.40 | [config](./rotated_rtmdet_tiny-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota_ms/) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota_ms/) | -| RTMDet-s | IMP | RR | 48.16 | 76.93 | 50.59 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_s-3x-dota/) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_s-3x-dota/) | -| RTMDet-s | IMP | MS+RR | 54.43 | 79.98 | 60.07 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_s-3x-dota_ms/) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_s-3x-dota_ms/) | -| RTMDet-m | IMP | RR | 50.56 | 78.24 | 54.47 | 24.67 | 99.76 | 7.82 | [config](./rotated_rtmdet_m-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_m-3x-dota/) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_m-3x-dota/) | -| RTMDet-m | IMP | MS+RR | 55.00 | 80.26 | 61.26 | 24.67 | 99.76 | 7.82 | [config](./rotated_rtmdet_m-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_m-3x-dota_ms/) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_m-3x-dota_ms/) | -| RTMDet-l | IMP | RR | 51.01 | 78.85 | 55.21 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-3x-dota/) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-3x-dota/) | -| RTMDet-l | IMP | MS+RR | 55.52 | 80.54 | 61.47 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-3x-dota_ms/) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-3x-dota_ms/) | -| RTMDet-l | COP | MS+RR | 56.74 | 81.33 | 63.45 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-coco_pretrain-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-coco_pretrain-3x-dota_ms/) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-coco_pretrain-3x-dota_ms/) | +| Backbone | pretrain | Aug | mmAP | mAP50 | mAP75 | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) | Config | Download | +| :---------: | :------: | :---: | :---: | :---: | :---: | :-------: | :------: | :------------------: | :------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| RTMDet-tiny | IN | RR | 47.37 | 75.36 | 50.64 | 4.88 | 20.45 | 4.40 | [config](./rotated_rtmdet_tiny-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota/rotated_rtmdet_tiny-3x-dota-9d821076.pth) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota/rotated_rtmdet_tiny-3x-dota_20221201_120814.json) | +| RTMDet-tiny | IN | MS+RR | 53.59 | 79.82 | 58.87 | 4.88 | 20.45 | 4.40 | [config](./rotated_rtmdet_tiny-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota_ms/rotated_rtmdet_tiny-3x-dota_ms-f12286ff.pth) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota_ms/rotated_rtmdet_tiny-3x-dota_ms_20221113_201235.log) | +| RTMDet-s | IN | RR | 48.16 | 76.93 | 50.59 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_s-3x-dota/rotated_rtmdet_s-3x-dota-11f6ccf5.pth) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_s-3x-dota/rotated_rtmdet_s-3x-dota_20221124_081442.json) | +| RTMDet-s | IN | MS+RR | 54.43 | 79.98 | 60.07 | 8.86 | 37.62 | 4.86 | [config](./rotated_rtmdet_s-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_s-3x-dota_ms/rotated_rtmdet_s-3x-dota_ms-20ead048.pth) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_s-3x-dota_ms/rotated_rtmdet_s-3x-dota_ms_20221113_201055.json) | +| RTMDet-m | IN | RR | 50.56 | 78.24 | 54.47 | 24.67 | 99.76 | 7.82 | [config](./rotated_rtmdet_m-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_m-3x-dota/rotated_rtmdet_m-3x-dota-beeadda6.pth) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_m-3x-dota/rotated_rtmdet_m-3x-dota_20221122_011234.json) | +| RTMDet-m | IN | MS+RR | 55.00 | 80.26 | 61.26 | 24.67 | 99.76 | 7.82 | [config](./rotated_rtmdet_m-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_m-3x-dota_ms/rotated_rtmdet_m-3x-dota_ms-c71eb375.pth) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_m-3x-dota_ms/rotated_rtmdet_m-3x-dota_ms_20221122_011234.json) | +| RTMDet-l | IN | RR | 51.01 | 78.85 | 55.21 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-3x-dota/rotated_rtmdet_l-3x-dota-23992372.pth) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-3x-dota/rotated_rtmdet_l-3x-dota_20221122_011241.json) | +| RTMDet-l | IN | MS+RR | 55.52 | 80.54 | 61.47 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-3x-dota_ms/rotated_rtmdet_l-3x-dota_ms-2738da34.pth) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-3x-dota_ms/rotated_rtmdet_l-3x-dota_ms_20221122_011241.json) | +| RTMDet-l | COCO | MS+RR | 56.74 | 81.33 | 63.45 | 52.27 | 204.21 | 10.82 | [config](./rotated_rtmdet_l-coco_pretrain-3x-dota_ms.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-coco_pretrain-3x-dota_ms/rotated_rtmdet_l-coco_pretrain-3x-dota_ms-06d248a2.pth) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-coco_pretrain-3x-dota_ms/rotated_rtmdet_l-coco_pretrain-3x-dota_ms_20221113_202010.json) | - By default, DOTA-v1.0 dataset trained with 3x schedule and image size 1024\*1024. @@ -32,24 +32,22 @@ Our tech-report will be released soon. | Backbone | pretrain | Aug | mAP 07 | mAP 12 | Params(M) | FLOPS(G) | Config | Download | | :---------: | :------: | :-: | :----: | :----: | :-------: | :------: | :----------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| RTMDet-tiny | IMP | RR | 90.6 | 97.1 | 4.88 | 12.54 | [config](./rotated_rtmdet_tiny-9x-hrsc.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-9x-hrsc/rotated_rtmdet_tiny-9x-hrsc-9f2e3ca6.pth) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-9x-hrsc/rotated_rtmdet_tiny-9x-hrsc_20221125_145920.json) | +| RTMDet-tiny | IN | RR | 90.6 | 97.1 | 4.88 | 12.54 | [config](./rotated_rtmdet_tiny-9x-hrsc.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-9x-hrsc/rotated_rtmdet_tiny-9x-hrsc-9f2e3ca6.pth) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-9x-hrsc/rotated_rtmdet_tiny-9x-hrsc_20221125_145920.json) | - By default, HRSC dataset trained with 9x schedule and image size 800\*800. ### Stronger augmentation -We also provide configs with Mixup, Mosaic and RandomRotate. Training time is less than MS. +We also provide configs with Mixup, Mosaic and RandomRotate with longer schedule. Training time is less than MS. DOTA: | Backbone | pretrain | schedule | Aug | mmAP | mAP50 | mAP75 | Config | Download | | :------: | :------: | :------: | :-------------: | :---: | :---: | :---: | :-------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| RTMDet-l | IMP | 100e | Mixup+Mosaic+RR | 54.59 | 80.16 | 61.16 | [config](./rotated_rtmdet_l-100e-aug-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota/rotated_rtmdet_l-100e-aug-dota-bc59fd88.pth) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota/rotated_rtmdet_l-100e-aug-dota_20221124_224135.json) | +| RTMDet-l | IN | 100e | Mixup+Mosaic+RR | 54.59 | 80.16 | 61.16 | [config](./rotated_rtmdet_l-100e-aug-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota/rotated_rtmdet_l-100e-aug-dota-bc59fd88.pth) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota/rotated_rtmdet_l-100e-aug-dota_20221124_224135.json) | **Note**: 1. We follow the latest metrics from the DOTA evaluation server, original voc format mAP is now mAP50. -2. `IMP` means ImageNet pretrain, `COP` means COCO pretrain. -3. The inference speed is measured on an NVIDIA 2080Ti GPU with TensorRT 8.4.3, cuDNN 8.2.0, FP16, batch size=1, and - without NMS. -4. We also provide config with mixup and mosaic for longer schedule. +2. `IN` means ImageNet pretrain, `COCO` means COCO pretrain. +3. Different from the report, the inference speed here is measured on an NVIDIA 2080Ti GPU with TensorRT 8.4.3, cuDNN 8.2.0, FP16, batch size=1, and with NMS. diff --git a/configs/rotated_rtmdet/metafile.yml b/configs/rotated_rtmdet/metafile.yml index cb481747c..013efde78 100644 --- a/configs/rotated_rtmdet/metafile.yml +++ b/configs/rotated_rtmdet/metafile.yml @@ -24,7 +24,7 @@ Models: Dataset: DOTAv1.0 Metrics: mAP: 75.60 - Weights: todo + Weights: https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota/rotated_rtmdet_tiny-3x-dota-9d821076.pth - Name: rotated_rtmdet_tiny-3x-dota_ms In Collection: rotated_rtmdet @@ -36,7 +36,7 @@ Models: Dataset: DOTAv1.0 Metrics: mAP: 79.82 - Weights: todo + Weights: https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota_ms/rotated_rtmdet_tiny-3x-dota_ms-f12286ff.pth - Name: rotated_rtmdet_s-3x-dota In Collection: rotated_rtmdet @@ -48,7 +48,7 @@ Models: Dataset: DOTAv1.0 Metrics: mAP: 76.93 - Weights: todo + Weights: https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_s-3x-dota/rotated_rtmdet_s-3x-dota-11f6ccf5.pth - Name: rotated_rtmdet_s-3x-dota_ms In Collection: rotated_rtmdet @@ -60,7 +60,7 @@ Models: Dataset: DOTAv1.0 Metrics: mAP: 79.98 - Weights: todo + Weights: https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_s-3x-dota_ms/rotated_rtmdet_s-3x-dota_ms-20ead048.pth - Name: rotated_rtmdet_m-3x-dota In Collection: rotated_rtmdet @@ -72,7 +72,7 @@ Models: Dataset: DOTAv1.0 Metrics: mAP: 78.24 - Weights: todo + Weights: https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_m-3x-dota/rotated_rtmdet_m-3x-dota-beeadda6.pth - Name: rotated_rtmdet_m-3x-dota_ms In Collection: rotated_rtmdet @@ -84,7 +84,7 @@ Models: Dataset: DOTAv1.0 Metrics: mAP: 80.26 - Weights: todo + Weights: https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_m-3x-dota_ms/rotated_rtmdet_m-3x-dota_ms-c71eb375.pth - Name: rotated_rtmdet_l-3x-dota In Collection: rotated_rtmdet @@ -96,7 +96,7 @@ Models: Dataset: DOTAv1.0 Metrics: mAP: 78.85 - Weights: todo + Weights: https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-3x-dota/rotated_rtmdet_l-3x-dota-23992372.pth - Name: rotated_rtmdet_l-3x-dota_ms In Collection: rotated_rtmdet @@ -108,7 +108,7 @@ Models: Dataset: DOTAv1.0 Metrics: mAP: 80.54 - Weights: todo + Weights: https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-3x-dota_ms/rotated_rtmdet_l-3x-dota_ms-2738da34.pth - Name: rotated_rtmdet_l-coco_pretrain-3x-dota_ms In Collection: rotated_rtmdet @@ -120,7 +120,7 @@ Models: Dataset: DOTAv1.0 Metrics: mAP: 81.33 - Weights: todo + Weights: https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-coco_pretrain-3x-dota_ms/rotated_rtmdet_l-coco_pretrain-3x-dota_ms-06d248a2.pth - Name: rotated_rtmdet_tiny-9x-hrsc In Collection: rotated_rtmdet @@ -132,4 +132,16 @@ Models: Dataset: HRSC Metrics: mAP: 90.6 - Weights: todo + Weights: https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-9x-hrsc/rotated_rtmdet_tiny-9x-hrsc-9f2e3ca6.pth + + - Name: rotated_rtmdet_l-100e-aug-dota + In Collection: rotated_rtmdet + Config: configs/rotated_rtmdet/ + Metadata: + Training Data: DOTAv1.0 + Results: + - Task: Oriented Object Detection + Dataset: DOTAv1.0 + Metrics: + mAP: 80.16 + Weights: https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota/rotated_rtmdet_l-100e-aug-dota-bc59fd88.pth From 708e9cf53945f94b34266eec375cf8dc5c177870 Mon Sep 17 00:00:00 2001 From: Yanyi Liu Date: Fri, 16 Dec 2022 11:59:52 +0800 Subject: [PATCH 46/52] update readme --- configs/rotated_rtmdet/README.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/configs/rotated_rtmdet/README.md b/configs/rotated_rtmdet/README.md index 97df01810..4a0d40e80 100644 --- a/configs/rotated_rtmdet/README.md +++ b/configs/rotated_rtmdet/README.md @@ -1,10 +1,12 @@ # RTMDet-R +> [RTMDet: An Empirical Study of Designing Real-Time Object Detectors](https://arxiv.org/abs/2212.07784) + ## Abstract -Our tech-report will be released soon. +In this paper, we aim to design an efficient real-time object detector that exceeds the YOLO series and is easily extensible for many object recognition tasks such as instance segmentation and rotated object detection. To obtain a more efficient model architecture, we explore an architecture that has compatible capacities in the backbone and neck, constructed by a basic building block that consists of large-kernel depth-wise convolutions. We further introduce soft labels when calculating matching costs in the dynamic label assignment to improve accuracy. Together with better training techniques, the resulting object detector, named RTMDet, achieves 52.8% AP on COCO with 300+ FPS on an NVIDIA 3090 GPU, outperforming the current mainstream industrial detectors. RTMDet achieves the best parameter-accuracy trade-off with tiny/small/medium/large/extra-large model sizes for various application scenarios, and obtains new state-of-the-art performance on real-time instance segmentation and rotated object detection. We hope the experimental results can provide new insights into designing versatile real-time object detectors for many object recognition tasks.
@@ -14,6 +16,10 @@ Our tech-report will be released soon. ### DOTA-v1.0 +[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/rtmdet-an-empirical-study-of-designing-real/object-detection-in-aerial-images-on-dota-1)](https://paperswithcode.com/sota/object-detection-in-aerial-images-on-dota-1?p=rtmdet-an-empirical-study-of-designing-real) + +[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/rtmdet-an-empirical-study-of-designing-real/one-stage-anchor-free-oriented-object-1)](https://paperswithcode.com/sota/one-stage-anchor-free-oriented-object-1?p=rtmdet-an-empirical-study-of-designing-real) + | Backbone | pretrain | Aug | mmAP | mAP50 | mAP75 | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) | Config | Download | | :---------: | :------: | :---: | :---: | :---: | :---: | :-------: | :------: | :------------------: | :------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | | RTMDet-tiny | IN | RR | 47.37 | 75.36 | 50.64 | 4.88 | 20.45 | 4.40 | [config](./rotated_rtmdet_tiny-3x-dota.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota/rotated_rtmdet_tiny-3x-dota-9d821076.pth) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-3x-dota/rotated_rtmdet_tiny-3x-dota_20221201_120814.json) | @@ -30,6 +36,10 @@ Our tech-report will be released soon. ### HRSC +[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/rtmdet-an-empirical-study-of-designing-real/one-stage-anchor-free-oriented-object-3)](https://paperswithcode.com/sota/one-stage-anchor-free-oriented-object-3?p=rtmdet-an-empirical-study-of-designing-real) + +[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/rtmdet-an-empirical-study-of-designing-real/object-detection-in-aerial-images-on-hrsc2016)](https://paperswithcode.com/sota/object-detection-in-aerial-images-on-hrsc2016?p=rtmdet-an-empirical-study-of-designing-real) + | Backbone | pretrain | Aug | mAP 07 | mAP 12 | Params(M) | FLOPS(G) | Config | Download | | :---------: | :------: | :-: | :----: | :----: | :-------: | :------: | :----------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | | RTMDet-tiny | IN | RR | 90.6 | 97.1 | 4.88 | 12.54 | [config](./rotated_rtmdet_tiny-9x-hrsc.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-9x-hrsc/rotated_rtmdet_tiny-9x-hrsc-9f2e3ca6.pth) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-9x-hrsc/rotated_rtmdet_tiny-9x-hrsc_20221125_145920.json) | From d9db0f9f11de7a64a4ce455bb0dacff9d848b9b2 Mon Sep 17 00:00:00 2001 From: Yanyi Liu Date: Fri, 16 Dec 2022 12:01:55 +0800 Subject: [PATCH 47/52] update readme --- configs/rotated_rtmdet/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/rotated_rtmdet/README.md b/configs/rotated_rtmdet/README.md index 4a0d40e80..e2f52f2d7 100644 --- a/configs/rotated_rtmdet/README.md +++ b/configs/rotated_rtmdet/README.md @@ -36,10 +36,10 @@ In this paper, we aim to design an efficient real-time object detector that exce ### HRSC -[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/rtmdet-an-empirical-study-of-designing-real/one-stage-anchor-free-oriented-object-3)](https://paperswithcode.com/sota/one-stage-anchor-free-oriented-object-3?p=rtmdet-an-empirical-study-of-designing-real) - [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/rtmdet-an-empirical-study-of-designing-real/object-detection-in-aerial-images-on-hrsc2016)](https://paperswithcode.com/sota/object-detection-in-aerial-images-on-hrsc2016?p=rtmdet-an-empirical-study-of-designing-real) +[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/rtmdet-an-empirical-study-of-designing-real/one-stage-anchor-free-oriented-object-3)](https://paperswithcode.com/sota/one-stage-anchor-free-oriented-object-3?p=rtmdet-an-empirical-study-of-designing-real) + | Backbone | pretrain | Aug | mAP 07 | mAP 12 | Params(M) | FLOPS(G) | Config | Download | | :---------: | :------: | :-: | :----: | :----: | :-------: | :------: | :----------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | | RTMDet-tiny | IN | RR | 90.6 | 97.1 | 4.88 | 12.54 | [config](./rotated_rtmdet_tiny-9x-hrsc.py) | [model](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-9x-hrsc/rotated_rtmdet_tiny-9x-hrsc-9f2e3ca6.pth) \| [log](https://download.openmmlab.com/mmrotate/v1.0/rotated_rtmdet/rotated_rtmdet_tiny-9x-hrsc/rotated_rtmdet_tiny-9x-hrsc_20221125_145920.json) | From 268fe57564bbdb2c81b395b3679ae96667556b3a Mon Sep 17 00:00:00 2001 From: Yanyi Liu Date: Fri, 16 Dec 2022 12:10:21 +0800 Subject: [PATCH 48/52] remove useless cfg --- ..._fcos_distribution_r50_fpn_6x_hrsc_le90.py | 68 ------------------- 1 file changed, 68 deletions(-) delete mode 100644 configs/rotated_fcos/rotated_fcos_distribution_r50_fpn_6x_hrsc_le90.py diff --git a/configs/rotated_fcos/rotated_fcos_distribution_r50_fpn_6x_hrsc_le90.py b/configs/rotated_fcos/rotated_fcos_distribution_r50_fpn_6x_hrsc_le90.py deleted file mode 100644 index e7a9d8f35..000000000 --- a/configs/rotated_fcos/rotated_fcos_distribution_r50_fpn_6x_hrsc_le90.py +++ /dev/null @@ -1,68 +0,0 @@ -_base_ = [ - '../_base_/datasets/hrsc.py', '../_base_/schedules/schedule_6x.py', - '../_base_/default_runtime.py' -] -angle_version = 'le90' - -# model settings -model = dict( - type='mmdet.FCOS', - data_preprocessor=dict( - type='mmdet.DetDataPreprocessor', - mean=[123.675, 116.28, 103.53], - std=[58.395, 57.12, 57.375], - bgr_to_rgb=True, - pad_size_divisor=32, - boxtype2tensor=False), - backbone=dict( - type='mmdet.ResNet', - depth=50, - num_stages=4, - out_indices=(0, 1, 2, 3), - frozen_stages=1, - norm_cfg=dict(type='BN', requires_grad=True), - norm_eval=True, - style='pytorch', - init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), - neck=dict( - type='mmdet.FPN', - in_channels=[256, 512, 1024, 2048], - out_channels=256, - start_level=1, - add_extra_convs='on_output', - num_outs=5, - relu_before_extra_convs=True), - bbox_head=dict( - type='RotatedFCOSHead', - num_classes=1, - in_channels=256, - stacked_convs=4, - feat_channels=256, - strides=[8, 16, 32, 64, 128], - center_sampling=True, - center_sample_radius=1.5, - norm_on_bbox=True, - centerness_on_reg=True, - use_hbbox_loss=False, - scale_angle=True, - bbox_coder=dict( - type='DistanceAnglePointCoder', angle_version=angle_version), - loss_cls=dict( - type='mmdet.FocalLoss', - use_sigmoid=True, - gamma=2.0, - alpha=0.25, - loss_weight=1.0), - loss_bbox=dict(type='RotatedIoULoss', loss_weight=1.0), - angle_coder=dict(type='DistributionAngleCoder', reg_max=16), - loss_angle=dict(type='mmdet.DistributionFocalLoss', loss_weight=0.20), - loss_centerness=dict( - type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), - # training and testing settings - train_cfg=None, - test_cfg=dict( - nms_pre=2000, - min_bbox_size=0, - score_thr=0.05, - nms=dict(type='nms_rotated', iou_threshold=0.1), - max_per_img=2000)) From 97a1c2c998358df1a9e07dac63622b7f1db16817 Mon Sep 17 00:00:00 2001 From: Yanyi Liu Date: Fri, 16 Dec 2022 12:37:40 +0800 Subject: [PATCH 49/52] fix ut on cpu --- .../test_dense_heads/test_rotated_rtmdet_head.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_models/test_dense_heads/test_rotated_rtmdet_head.py b/tests/test_models/test_dense_heads/test_rotated_rtmdet_head.py index 527133fc6..76a69270c 100644 --- a/tests/test_models/test_dense_heads/test_rotated_rtmdet_head.py +++ b/tests/test_models/test_dense_heads/test_rotated_rtmdet_head.py @@ -1,7 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. import unittest -import pytest import torch from mmdet.models import L1Loss from mmengine.structures import InstanceData @@ -18,11 +17,12 @@ class TestRotatedRTMDetHead(unittest.TestCase): def setUp(self): register_all_modules() - @pytest.mark.skipif( - not torch.cuda.is_available(), reason='requires CUDA support') @parameterized.expand([(RotatedRTMDetHead, ), (RotatedRTMDetSepBNHead, )]) def test_rotated_rtmdet_head_loss(self, head_cls): """Tests rotated rtmdet head loss when truth is empty and non-empty.""" + if not torch.cuda.is_available(): + return unittest.skip('test requires GPU and torch+cuda') + angle_version = 'le90' s = 256 img_metas = [{ From f1529038c45ebcdd5872ed0fe219c6ee5fc96442 Mon Sep 17 00:00:00 2001 From: Yanyi Liu Date: Fri, 16 Dec 2022 14:45:41 +0800 Subject: [PATCH 50/52] fix comment --- configs/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota.py | 2 +- configs/rotated_rtmdet/rotated_rtmdet_l-9x-hrsc.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota.py b/configs/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota.py index 329e7bb36..be85e12e8 100644 --- a/configs/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota.py +++ b/configs/rotated_rtmdet/rotated_rtmdet_l-100e-aug-dota.py @@ -160,7 +160,7 @@ begin=0, end=1000), dict( - # use cosine lr from 150 to 300 epoch + # use cosine lr from 50 to 100 epoch type='CosineAnnealingLR', eta_min=base_lr * 0.05, begin=max_epochs // 2, diff --git a/configs/rotated_rtmdet/rotated_rtmdet_l-9x-hrsc.py b/configs/rotated_rtmdet/rotated_rtmdet_l-9x-hrsc.py index 76659ef59..1ae47c013 100644 --- a/configs/rotated_rtmdet/rotated_rtmdet_l-9x-hrsc.py +++ b/configs/rotated_rtmdet/rotated_rtmdet_l-9x-hrsc.py @@ -91,7 +91,7 @@ begin=0, end=1000), dict( - # use cosine lr from 150 to 300 epoch + # use cosine lr from 54 to 108 epoch type='CosineAnnealingLR', eta_min=base_lr * 0.05, begin=max_epochs // 2, From ca41ed96def93c28edb58fe551805f1f72552467 Mon Sep 17 00:00:00 2001 From: Yanyi Liu Date: Fri, 16 Dec 2022 16:52:50 +0800 Subject: [PATCH 51/52] add comment for change --- mmrotate/models/losses/gaussian_dist_loss.py | 1 + mmrotate/models/losses/gaussian_dist_loss_v1.py | 1 + 2 files changed, 2 insertions(+) diff --git a/mmrotate/models/losses/gaussian_dist_loss.py b/mmrotate/models/losses/gaussian_dist_loss.py index 3e64b1724..91889bb05 100644 --- a/mmrotate/models/losses/gaussian_dist_loss.py +++ b/mmrotate/models/losses/gaussian_dist_loss.py @@ -386,6 +386,7 @@ def forward(self, reduction_override if reduction_override else self.reduction) if (weight is not None) and (not torch.any(weight > 0)) and ( reduction != 'none'): + # handle different dim of weight if pred.dim() == weight.dim() + 1: weight = weight.unsqueeze(1) return (pred * weight).sum() # 0 diff --git a/mmrotate/models/losses/gaussian_dist_loss_v1.py b/mmrotate/models/losses/gaussian_dist_loss_v1.py index 09f6f4a6a..4a9f10601 100644 --- a/mmrotate/models/losses/gaussian_dist_loss_v1.py +++ b/mmrotate/models/losses/gaussian_dist_loss_v1.py @@ -213,6 +213,7 @@ def forward(self, reduction_override if reduction_override else self.reduction) if (weight is not None) and (not torch.any(weight > 0)) and ( reduction != 'none'): + # handle different dim of weight if pred.dim() == weight.dim() + 1: weight = weight.unsqueeze(1) return (pred * weight).sum() # 0 From 234156669b6bd716d9d7a7baea4bfd9c77bbc8d2 Mon Sep 17 00:00:00 2001 From: Yanyi Liu Date: Fri, 16 Dec 2022 16:53:50 +0800 Subject: [PATCH 52/52] add cite --- configs/rotated_rtmdet/README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/configs/rotated_rtmdet/README.md b/configs/rotated_rtmdet/README.md index e2f52f2d7..42a4d5bcb 100644 --- a/configs/rotated_rtmdet/README.md +++ b/configs/rotated_rtmdet/README.md @@ -61,3 +61,16 @@ DOTA: 1. We follow the latest metrics from the DOTA evaluation server, original voc format mAP is now mAP50. 2. `IN` means ImageNet pretrain, `COCO` means COCO pretrain. 3. Different from the report, the inference speed here is measured on an NVIDIA 2080Ti GPU with TensorRT 8.4.3, cuDNN 8.2.0, FP16, batch size=1, and with NMS. + +## Citation + +``` +@misc{lyu2022rtmdet, + title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors}, + author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen}, + year={2022}, + eprint={2212.07784}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +```