Source code for mmrotate.models.dense_heads.csl_rotated_retina_head
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn
from mmcv.runner import force_fp32
from mmdet.core import images_to_levels, multi_apply, unmap
from mmrotate.core import build_bbox_coder, multiclass_nms_rotated
from ... import obb2hbb, rotated_anchor_inside_flags
from ..builder import ROTATED_HEADS, build_loss
from .rotated_retina_head import RotatedRetinaHead
[docs]@ROTATED_HEADS.register_module()
class CSLRRetinaHead(RotatedRetinaHead):
"""Rotational Anchor-based refine head.
Args:
use_encoded_angle (bool): Decide whether to use encoded angle or
gt angle as target. Default: True.
shield_reg_angle (bool): Decide whether to shield the angle loss from
reg branch. Default: False.
angle_coder (dict): Config of angle coder.
loss_angle (dict): Config of angle classification loss.
init_cfg (dict or list[dict], optional): Initialization config dict.
""" # noqa: W605
def __init__(self,
use_encoded_angle=True,
shield_reg_angle=False,
angle_coder=dict(
type='CSLCoder',
angle_version='le90',
omega=1,
window='gaussian',
radius=6),
loss_angle=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
loss_weight=1.0),
init_cfg=dict(
type='Normal',
layer='Conv2d',
std=0.01,
override=[
dict(
type='Normal',
name='retina_cls',
std=0.01,
bias_prob=0.01),
dict(
type='Normal',
name='retina_angle_cls',
std=0.01,
bias_prob=0.01),
]),
**kwargs):
self.angle_coder = build_bbox_coder(angle_coder)
self.coding_len = self.angle_coder.coding_len
super(CSLRRetinaHead, self).__init__(**kwargs, init_cfg=init_cfg)
self.shield_reg_angle = shield_reg_angle
self.loss_angle = build_loss(loss_angle)
self.use_encoded_angle = use_encoded_angle
def _init_layers(self):
"""Initialize layers of the head."""
super(CSLRRetinaHead, self)._init_layers()
self.retina_angle_cls = nn.Conv2d(
self.feat_channels,
self.num_anchors * self.coding_len,
3,
padding=1)
[docs] def forward_single(self, x):
"""Forward feature of a single scale level.
Args:
x (torch.Tensor): Features of a single scale level.
Returns:
tuple (torch.Tensor):
- cls_score (torch.Tensor): Cls scores for a single scale \
level the channels number is num_anchors * num_classes.
- bbox_pred (torch.Tensor): Box energies / deltas for a \
single scale level, the channels number is num_anchors * 5.
- angle_cls (torch.Tensor): Angle for a single scale level \
the channels number is num_anchors * coding_len.
"""
cls_feat = x
reg_feat = x
for cls_conv in self.cls_convs:
cls_feat = cls_conv(cls_feat)
for reg_conv in self.reg_convs:
reg_feat = reg_conv(reg_feat)
cls_score = self.retina_cls(cls_feat)
bbox_pred = self.retina_reg(reg_feat)
angle_cls = self.retina_angle_cls(reg_feat)
return cls_score, bbox_pred, angle_cls
[docs] def loss_single(self, cls_score, bbox_pred, angle_cls, anchors, labels,
label_weights, bbox_targets, bbox_weights, angle_targets,
angle_weights, num_total_samples):
"""Compute loss of a single scale level.
Args:
cls_score (torch.Tensor): Box scores for each scale level
Has shape (N, num_anchors * num_classes, H, W).
bbox_pred (torch.Tensor): Box energies / deltas for each scale
level with shape (N, num_anchors * 5, H, W).
anchors (torch.Tensor): Box reference for each scale level with
shape (N, num_total_anchors, 5).
labels (torch.Tensor): Labels of each anchors with shape
(N, num_total_anchors).
label_weights (torch.Tensor): Label weights of each anchor with
shape (N, num_total_anchors)
bbox_targets (torch.Tensor): BBox regression targets of each anchor
weight shape (N, num_total_anchors, 5).
bbox_weights (torch.Tensor): BBox regression loss weights of each
anchor with shape (N, num_total_anchors, 5).
angle_targets (torch.Tensor): Angle classification targets of
each anchor weight shape (N, num_total_anchors, coding_len).
angle_weights (torch.Tensor): Angle classification loss weights
of each anchor with shape (N, num_total_anchors, 1).
num_total_samples (int): If sampling, num total samples equal to
the number of total anchors; Otherwise, it is the number of
positive anchors.
Returns:
tuple (torch.Tensor):
- loss_cls (torch.Tensor): cls. loss for each scale level.
- loss_bbox (torch.Tensor): reg. loss for each scale level.
- loss_angle (torch.Tensor): angle cls. loss for each scale \
level.
"""
# Classification loss
labels = labels.reshape(-1)
label_weights = label_weights.reshape(-1)
cls_score = cls_score.permute(0, 2, 3,
1).reshape(-1, self.cls_out_channels)
loss_cls = self.loss_cls(
cls_score, labels, label_weights, avg_factor=num_total_samples)
# Regression loss
bbox_targets = bbox_targets.reshape(-1, 5)
bbox_weights = bbox_weights.reshape(-1, 5)
# Shield angle in reg. branch
if self.shield_reg_angle:
bbox_weights[:, -1] = 0.
bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 5)
if self.reg_decoded_bbox:
anchors = anchors.reshape(-1, 5)
bbox_pred = self.bbox_coder.decode(anchors, bbox_pred)
loss_bbox = self.loss_bbox(
bbox_pred,
bbox_targets,
bbox_weights,
avg_factor=num_total_samples)
angle_cls = angle_cls.permute(0, 2, 3, 1).reshape(-1, self.coding_len)
angle_targets = angle_targets.reshape(-1, self.coding_len)
angle_weights = angle_weights.reshape(-1, 1)
loss_angle = self.loss_angle(
angle_cls,
angle_targets,
weight=angle_weights,
avg_factor=num_total_samples)
return loss_cls, loss_bbox, loss_angle
[docs] @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'angle_clses'))
def loss(self,
cls_scores,
bbox_preds,
angle_clses,
gt_bboxes,
gt_labels,
img_metas,
gt_bboxes_ignore=None):
"""Compute losses of the head.
Args:
cls_scores (list[Tensor]): Box scores for each scale level
Has shape (N, num_anchors * num_classes, H, W)
bbox_preds (list[Tensor]): Box energies / deltas for each scale
level with shape (N, num_anchors * 5, H, W)
angle_clses (list[Tensor]): Box angles for each scale
level with shape (N, num_anchors * coding_len, H, W)
gt_bboxes (list[Tensor]): Ground truth bboxes for each image with
shape (num_gts, 5) in [cx, cy, w, h, a] format.
gt_labels (list[Tensor]): class indices corresponding to each box
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
gt_bboxes_ignore (None | list[Tensor]): specify which bounding
boxes can be ignored when computing the loss. Default: None
Returns:
dict[str, Tensor]: A dictionary of loss components.
"""
featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
assert len(featmap_sizes) == self.anchor_generator.num_levels
device = cls_scores[0].device
anchor_list, valid_flag_list = self.get_anchors(
featmap_sizes, img_metas, device=device)
label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1
cls_reg_targets = self.get_targets(
anchor_list,
valid_flag_list,
gt_bboxes,
img_metas,
gt_bboxes_ignore_list=gt_bboxes_ignore,
gt_labels_list=gt_labels,
label_channels=label_channels)
if cls_reg_targets is None:
return None
(labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,
num_total_pos, num_total_neg, angel_target_list,
angel_weight_list) = cls_reg_targets
num_total_samples = (
num_total_pos + num_total_neg if self.sampling else num_total_pos)
# Anchor number of multi levels
num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]
# Concat all level anchors and flags to a single tensor
concat_anchor_list = []
for i, _ in enumerate(anchor_list):
concat_anchor_list.append(torch.cat(anchor_list[i]))
all_anchor_list = images_to_levels(concat_anchor_list,
num_level_anchors)
losses_cls, losses_bbox, losses_angle = multi_apply(
self.loss_single,
cls_scores,
bbox_preds,
angle_clses,
all_anchor_list,
labels_list,
label_weights_list,
bbox_targets_list,
bbox_weights_list,
angel_target_list,
angel_weight_list,
num_total_samples=num_total_samples)
return dict(
loss_cls=losses_cls,
loss_bbox=losses_bbox,
loss_angle=losses_angle)
def _get_targets_single(self,
flat_anchors,
valid_flags,
gt_bboxes,
gt_bboxes_ignore,
gt_labels,
img_meta,
label_channels=1,
unmap_outputs=True):
"""Compute regression and classification targets for anchors in a
single image.
Args:
flat_anchors (torch.Tensor): Multi-level anchors of the image,
which are concatenated into a single tensor of shape
(num_anchors, 5)
valid_flags (torch.Tensor): Multi level valid flags of the image,
which are concatenated into a single tensor of
shape (num_anchors,).
gt_bboxes (torch.Tensor): Ground truth bboxes of the image,
shape (num_gts, 5).
img_meta (dict): Meta info of the image.
gt_bboxes_ignore (torch.Tensor): Ground truth bboxes to be
ignored, shape (num_ignored_gts, 5).
img_meta (dict): Meta info of the image.
gt_labels (torch.Tensor): Ground truth labels of each box,
shape (num_gts,).
label_channels (int): Channel of label. Default: 1.
unmap_outputs (bool): Whether to map outputs back to the original
set of anchors. Default: True.
Returns:
tuple (list[Tensor]):
- labels_list (list[Tensor]): Labels of each level
- label_weights_list (list[Tensor]): Label weights of each \
level
- bbox_targets_list (list[Tensor]): BBox targets of each level
- bbox_weights_list (list[Tensor]): BBox weights of each level
- angle_targets_list (list[Tensor]): Angle targets of each \
level
- angle_weights_list (list[Tensor]): Angle weights of each
level
- num_total_pos (int): Number of positive samples in all images
- num_total_neg (int): Number of negative samples in all images
"""
inside_flags = rotated_anchor_inside_flags(
flat_anchors, valid_flags, img_meta['img_shape'][:2],
self.train_cfg.allowed_border)
if not inside_flags.any():
return (None, ) * 9
# Assign gt and sample anchors
anchors = flat_anchors[inside_flags, :]
if self.assign_by_circumhbbox is not None:
gt_bboxes_assign = obb2hbb(gt_bboxes, self.assign_by_circumhbbox)
assign_result = self.assigner.assign(
anchors, gt_bboxes_assign, gt_bboxes_ignore,
None if self.sampling else gt_labels)
else:
assign_result = self.assigner.assign(
anchors, gt_bboxes, gt_bboxes_ignore,
None if self.sampling else gt_labels)
sampling_result = self.sampler.sample(assign_result, anchors,
gt_bboxes)
num_valid_anchors = anchors.shape[0]
bbox_targets = torch.zeros_like(anchors)
bbox_weights = torch.zeros_like(anchors)
angle_targets = torch.zeros_like(bbox_targets[:, 4:5])
angle_weights = torch.zeros_like(bbox_targets[:, 4:5])
labels = anchors.new_full((num_valid_anchors, ),
self.num_classes,
dtype=torch.long)
label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)
pos_inds = sampling_result.pos_inds
neg_inds = sampling_result.neg_inds
if len(pos_inds) > 0:
if not self.reg_decoded_bbox:
pos_bbox_targets = self.bbox_coder.encode(
sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes)
else:
pos_bbox_targets = sampling_result.pos_gt_bboxes
bbox_targets[pos_inds, :] = pos_bbox_targets
bbox_weights[pos_inds, :] = 1.0
if self.use_encoded_angle:
# Get encoded angle as target
angle_targets[pos_inds, :] = pos_bbox_targets[:, 4:5]
else:
# Get gt angle as target
angle_targets[pos_inds, :] = \
sampling_result.pos_gt_bboxes[:, 4:5]
# Angle encoder
angle_targets = self.angle_coder.encode(angle_targets)
angle_weights[pos_inds, :] = 1.0
if gt_labels is None:
# Only rpn gives gt_labels as None
# Foreground is the first class since v2.5.0
labels[pos_inds] = 0
else:
labels[pos_inds] = gt_labels[
sampling_result.pos_assigned_gt_inds]
if self.train_cfg.pos_weight <= 0:
label_weights[pos_inds] = 1.0
else:
label_weights[pos_inds] = self.train_cfg.pos_weight
if len(neg_inds) > 0:
label_weights[neg_inds] = 1.0
# Map up to original set of anchors
if unmap_outputs:
num_total_anchors = flat_anchors.size(0)
labels = unmap(
labels, num_total_anchors, inside_flags,
fill=self.num_classes) # fill bg label
label_weights = unmap(label_weights, num_total_anchors,
inside_flags)
bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags)
bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags)
angle_targets = unmap(angle_targets, num_total_anchors,
inside_flags)
angle_weights = unmap(angle_weights, num_total_anchors,
inside_flags)
return (labels, label_weights, bbox_targets, bbox_weights, pos_inds,
neg_inds, sampling_result, angle_targets, angle_weights)
def _get_bboxes_single(self,
cls_score_list,
bbox_pred_list,
angle_cls_list,
mlvl_anchors,
img_shape,
scale_factor,
cfg,
rescale=False,
with_nms=True):
"""Transform outputs for a single batch item into bbox predictions.
Args:
cls_score_list (list[Tensor]): Box scores for a single scale level
Has shape (num_anchors * num_classes, H, W).
bbox_pred_list (list[Tensor]): Box energies / deltas for a single
scale level with shape (num_anchors * 5, H, W).
angle_cls_list (list[Tensor]): Angle deltas for a single
scale level with shape (num_anchors * coding_len, H, W).
mlvl_anchors (list[Tensor]): Box reference for a single scale level
with shape (num_total_anchors, 5).
img_shape (tuple[int]): Shape of the input image,
(height, width, 3).
scale_factor (ndarray): Scale factor of the image arange as
(w_scale, h_scale, w_scale, h_scale).
cfg (mmcv.Config): Test / postprocessing configuration,
if None, test_cfg would be used.
rescale (bool): If True, return boxes in original image space.
Default: False.
with_nms (bool): If True, do nms before return boxes.
Default: True.
Returns:
Tensor: Labeled boxes in shape (n, 5), where the first 4 columns
are bounding box positions (cx, cy, w, h, a) and the
6-th column is a score between 0 and 1.
"""
cfg = self.test_cfg if cfg is None else cfg
assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors)
mlvl_bboxes = []
mlvl_scores = []
for cls_score, bbox_pred, angle_cls, anchors in zip(
cls_score_list, bbox_pred_list, angle_cls_list, mlvl_anchors):
assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
cls_score = cls_score.permute(1, 2,
0).reshape(-1, self.cls_out_channels)
if self.use_sigmoid_cls:
scores = cls_score.sigmoid()
else:
scores = cls_score.softmax(-1)
bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 5)
angle_cls = angle_cls.permute(1, 2, 0).reshape(
-1, self.coding_len).sigmoid()
nms_pre = cfg.get('nms_pre', -1)
if scores.shape[0] > nms_pre > 0:
# Get maximum scores for foreground classes.
if self.use_sigmoid_cls:
max_scores, _ = scores.max(dim=1)
else:
# Remind that we set FG labels to [0, num_class-1]
# since mmdet v2.0
# BG cat_id: num_class
max_scores, _ = scores[:, :-1].max(dim=1)
_, topk_inds = max_scores.topk(nms_pre)
anchors = anchors[topk_inds, :]
bbox_pred = bbox_pred[topk_inds, :]
scores = scores[topk_inds, :]
angle_cls = angle_cls[topk_inds, :]
# Angle decoder
angle_pred = self.angle_coder.decode(angle_cls)
if self.use_encoded_angle:
bbox_pred[..., -1] = angle_pred
bboxes = self.bbox_coder.decode(
anchors, bbox_pred, max_shape=img_shape)
else:
bboxes = self.bbox_coder.decode(
anchors, bbox_pred, max_shape=img_shape)
bboxes[..., -1] = angle_pred
mlvl_bboxes.append(bboxes)
mlvl_scores.append(scores)
mlvl_bboxes = torch.cat(mlvl_bboxes)
if rescale:
# Angle should not be rescaled
mlvl_bboxes[:, :4] = mlvl_bboxes[:, :4] / mlvl_bboxes.new_tensor(
scale_factor)
mlvl_scores = torch.cat(mlvl_scores)
if self.use_sigmoid_cls:
# Add a dummy background class to the backend when using sigmoid
# Remind that we set FG labels to [0, num_class-1] since mmdet v2.0
# BG cat_id: num_class
padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)
if with_nms:
det_bboxes, det_labels = multiclass_nms_rotated(
mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms,
cfg.max_per_img)
return det_bboxes, det_labels
else:
return mlvl_bboxes, mlvl_scores
[docs] @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'angle_clses'))
def get_bboxes(self,
cls_scores,
bbox_preds,
angle_clses,
img_metas,
cfg=None,
rescale=False,
with_nms=True):
"""Transform network output for a batch into bbox predictions.
Args:
cls_scores (list[Tensor]): Box scores for each scale level
Has shape (N, num_anchors * num_classes, H, W)
bbox_preds (list[Tensor]): Box energies / deltas for each scale
level with shape (N, num_anchors * 5, H, W)
angle_clses (list[Tensor]): Box angles for each scale
level with shape (N, num_anchors * coding_len, H, W)
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
cfg (mmcv.Config | None): Test / postprocessing configuration,
if None, test_cfg would be used
rescale (bool): If True, return boxes in original image space.
Default: False.
with_nms (bool): If True, do nms before return boxes.
Default: True.
Returns:
list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.
The first item is an (n, 6) tensor, where the first 5 columns
are bounding box positions (cx, cy, w, h, a) and the
6-th column is a score between 0 and 1. The second item is a
(n,) tensor where each item is the predicted class label of the
corresponding box.
Example:
>>> import mmcv
>>> self = AnchorHead(
>>> num_classes=9,
>>> in_channels=1,
>>> anchor_generator=dict(
>>> type='AnchorGenerator',
>>> scales=[8],
>>> ratios=[0.5, 1.0, 2.0],
>>> strides=[4,]))
>>> img_metas = [{'img_shape': (32, 32, 3), 'scale_factor': 1}]
>>> cfg = mmcv.Config(dict(
>>> score_thr=0.00,
>>> nms=dict(type='nms', iou_thr=1.0),
>>> max_per_img=10))
>>> feat = torch.rand(1, 1, 3, 3)
>>> cls_score, bbox_pred = self.forward_single(feat)
>>> # Note the input lists are over different levels, not images
>>> cls_scores, bbox_preds = [cls_score], [bbox_pred]
>>> result_list = self.get_bboxes(cls_scores, bbox_preds,
>>> img_metas, cfg)
>>> det_bboxes, det_labels = result_list[0]
>>> assert len(result_list) == 1
>>> assert det_bboxes.shape[1] == 5
>>> assert len(det_bboxes) == len(det_labels) == cfg.max_per_img
"""
assert len(cls_scores) == len(bbox_preds)
num_levels = len(cls_scores)
device = cls_scores[0].device
featmap_sizes = [cls_scores[i].shape[-2:] for i in range(num_levels)]
mlvl_anchors = self.anchor_generator.grid_priors(
featmap_sizes, device=device)
result_list = []
for img_id, _ in enumerate(img_metas):
cls_score_list = [
cls_scores[i][img_id].detach() for i in range(num_levels)
]
bbox_pred_list = [
bbox_preds[i][img_id].detach() for i in range(num_levels)
]
angle_cls_list = [
angle_clses[i][img_id].detach() for i in range(num_levels)
]
img_shape = img_metas[img_id]['img_shape']
scale_factor = img_metas[img_id]['scale_factor']
if with_nms:
# Some heads don't support with_nms argument
proposals = self._get_bboxes_single(cls_score_list,
bbox_pred_list,
angle_cls_list,
mlvl_anchors, img_shape,
scale_factor, cfg, rescale)
else:
proposals = self._get_bboxes_single(cls_score_list,
bbox_pred_list,
angle_cls_list,
mlvl_anchors, img_shape,
scale_factor, cfg, rescale,
with_nms)
result_list.append(proposals)
return result_list