Shortcuts

Source code for mmrotate.models.dense_heads.csl_rotated_retina_head

# Copyright (c) OpenMMLab. All rights reserved.

import torch
import torch.nn as nn
from mmcv.runner import force_fp32
from mmdet.core import images_to_levels, multi_apply, unmap

from mmrotate.core import build_bbox_coder, multiclass_nms_rotated
from ... import obb2hbb, rotated_anchor_inside_flags
from ..builder import ROTATED_HEADS, build_loss
from .rotated_retina_head import RotatedRetinaHead


[docs]@ROTATED_HEADS.register_module() class CSLRRetinaHead(RotatedRetinaHead): """Rotational Anchor-based refine head. Args: use_encoded_angle (bool): Decide whether to use encoded angle or gt angle as target. Default: True. shield_reg_angle (bool): Decide whether to shield the angle loss from reg branch. Default: False. angle_coder (dict): Config of angle coder. loss_angle (dict): Config of angle classification loss. init_cfg (dict or list[dict], optional): Initialization config dict. """ # noqa: W605 def __init__(self, use_encoded_angle=True, shield_reg_angle=False, angle_coder=dict( type='CSLCoder', angle_version='le90', omega=1, window='gaussian', radius=6), loss_angle=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), init_cfg=dict( type='Normal', layer='Conv2d', std=0.01, override=[ dict( type='Normal', name='retina_cls', std=0.01, bias_prob=0.01), dict( type='Normal', name='retina_angle_cls', std=0.01, bias_prob=0.01), ]), **kwargs): self.angle_coder = build_bbox_coder(angle_coder) self.coding_len = self.angle_coder.coding_len super(CSLRRetinaHead, self).__init__(**kwargs, init_cfg=init_cfg) self.shield_reg_angle = shield_reg_angle self.loss_angle = build_loss(loss_angle) self.use_encoded_angle = use_encoded_angle def _init_layers(self): """Initialize layers of the head.""" super(CSLRRetinaHead, self)._init_layers() self.retina_angle_cls = nn.Conv2d( self.feat_channels, self.num_anchors * self.coding_len, 3, padding=1)
[docs] def forward_single(self, x): """Forward feature of a single scale level. Args: x (torch.Tensor): Features of a single scale level. Returns: tuple (torch.Tensor): - cls_score (torch.Tensor): Cls scores for a single scale \ level the channels number is num_anchors * num_classes. - bbox_pred (torch.Tensor): Box energies / deltas for a \ single scale level, the channels number is num_anchors * 5. - angle_cls (torch.Tensor): Angle for a single scale level \ the channels number is num_anchors * coding_len. """ cls_feat = x reg_feat = x for cls_conv in self.cls_convs: cls_feat = cls_conv(cls_feat) for reg_conv in self.reg_convs: reg_feat = reg_conv(reg_feat) cls_score = self.retina_cls(cls_feat) bbox_pred = self.retina_reg(reg_feat) angle_cls = self.retina_angle_cls(reg_feat) return cls_score, bbox_pred, angle_cls
[docs] def loss_single(self, cls_score, bbox_pred, angle_cls, anchors, labels, label_weights, bbox_targets, bbox_weights, angle_targets, angle_weights, num_total_samples): """Compute loss of a single scale level. Args: cls_score (torch.Tensor): Box scores for each scale level Has shape (N, num_anchors * num_classes, H, W). bbox_pred (torch.Tensor): Box energies / deltas for each scale level with shape (N, num_anchors * 5, H, W). anchors (torch.Tensor): Box reference for each scale level with shape (N, num_total_anchors, 5). labels (torch.Tensor): Labels of each anchors with shape (N, num_total_anchors). label_weights (torch.Tensor): Label weights of each anchor with shape (N, num_total_anchors) bbox_targets (torch.Tensor): BBox regression targets of each anchor weight shape (N, num_total_anchors, 5). bbox_weights (torch.Tensor): BBox regression loss weights of each anchor with shape (N, num_total_anchors, 5). angle_targets (torch.Tensor): Angle classification targets of each anchor weight shape (N, num_total_anchors, coding_len). angle_weights (torch.Tensor): Angle classification loss weights of each anchor with shape (N, num_total_anchors, 1). num_total_samples (int): If sampling, num total samples equal to the number of total anchors; Otherwise, it is the number of positive anchors. Returns: tuple (torch.Tensor): - loss_cls (torch.Tensor): cls. loss for each scale level. - loss_bbox (torch.Tensor): reg. loss for each scale level. - loss_angle (torch.Tensor): angle cls. loss for each scale \ level. """ # Classification loss labels = labels.reshape(-1) label_weights = label_weights.reshape(-1) cls_score = cls_score.permute(0, 2, 3, 1).reshape(-1, self.cls_out_channels) loss_cls = self.loss_cls( cls_score, labels, label_weights, avg_factor=num_total_samples) # Regression loss bbox_targets = bbox_targets.reshape(-1, 5) bbox_weights = bbox_weights.reshape(-1, 5) # Shield angle in reg. branch if self.shield_reg_angle: bbox_weights[:, -1] = 0. bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 5) if self.reg_decoded_bbox: anchors = anchors.reshape(-1, 5) bbox_pred = self.bbox_coder.decode(anchors, bbox_pred) loss_bbox = self.loss_bbox( bbox_pred, bbox_targets, bbox_weights, avg_factor=num_total_samples) angle_cls = angle_cls.permute(0, 2, 3, 1).reshape(-1, self.coding_len) angle_targets = angle_targets.reshape(-1, self.coding_len) angle_weights = angle_weights.reshape(-1, 1) loss_angle = self.loss_angle( angle_cls, angle_targets, weight=angle_weights, avg_factor=num_total_samples) return loss_cls, loss_bbox, loss_angle
[docs] @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'angle_clses')) def loss(self, cls_scores, bbox_preds, angle_clses, gt_bboxes, gt_labels, img_metas, gt_bboxes_ignore=None): """Compute losses of the head. Args: cls_scores (list[Tensor]): Box scores for each scale level Has shape (N, num_anchors * num_classes, H, W) bbox_preds (list[Tensor]): Box energies / deltas for each scale level with shape (N, num_anchors * 5, H, W) angle_clses (list[Tensor]): Box angles for each scale level with shape (N, num_anchors * coding_len, H, W) gt_bboxes (list[Tensor]): Ground truth bboxes for each image with shape (num_gts, 5) in [cx, cy, w, h, a] format. gt_labels (list[Tensor]): class indices corresponding to each box img_metas (list[dict]): Meta information of each image, e.g., image size, scaling factor, etc. gt_bboxes_ignore (None | list[Tensor]): specify which bounding boxes can be ignored when computing the loss. Default: None Returns: dict[str, Tensor]: A dictionary of loss components. """ featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] assert len(featmap_sizes) == self.anchor_generator.num_levels device = cls_scores[0].device anchor_list, valid_flag_list = self.get_anchors( featmap_sizes, img_metas, device=device) label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1 cls_reg_targets = self.get_targets( anchor_list, valid_flag_list, gt_bboxes, img_metas, gt_bboxes_ignore_list=gt_bboxes_ignore, gt_labels_list=gt_labels, label_channels=label_channels) if cls_reg_targets is None: return None (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg, angel_target_list, angel_weight_list) = cls_reg_targets num_total_samples = ( num_total_pos + num_total_neg if self.sampling else num_total_pos) # Anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] # Concat all level anchors and flags to a single tensor concat_anchor_list = [] for i, _ in enumerate(anchor_list): concat_anchor_list.append(torch.cat(anchor_list[i])) all_anchor_list = images_to_levels(concat_anchor_list, num_level_anchors) losses_cls, losses_bbox, losses_angle = multi_apply( self.loss_single, cls_scores, bbox_preds, angle_clses, all_anchor_list, labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, angel_target_list, angel_weight_list, num_total_samples=num_total_samples) return dict( loss_cls=losses_cls, loss_bbox=losses_bbox, loss_angle=losses_angle)
def _get_targets_single(self, flat_anchors, valid_flags, gt_bboxes, gt_bboxes_ignore, gt_labels, img_meta, label_channels=1, unmap_outputs=True): """Compute regression and classification targets for anchors in a single image. Args: flat_anchors (torch.Tensor): Multi-level anchors of the image, which are concatenated into a single tensor of shape (num_anchors, 5) valid_flags (torch.Tensor): Multi level valid flags of the image, which are concatenated into a single tensor of shape (num_anchors,). gt_bboxes (torch.Tensor): Ground truth bboxes of the image, shape (num_gts, 5). img_meta (dict): Meta info of the image. gt_bboxes_ignore (torch.Tensor): Ground truth bboxes to be ignored, shape (num_ignored_gts, 5). img_meta (dict): Meta info of the image. gt_labels (torch.Tensor): Ground truth labels of each box, shape (num_gts,). label_channels (int): Channel of label. Default: 1. unmap_outputs (bool): Whether to map outputs back to the original set of anchors. Default: True. Returns: tuple (list[Tensor]): - labels_list (list[Tensor]): Labels of each level - label_weights_list (list[Tensor]): Label weights of each \ level - bbox_targets_list (list[Tensor]): BBox targets of each level - bbox_weights_list (list[Tensor]): BBox weights of each level - angle_targets_list (list[Tensor]): Angle targets of each \ level - angle_weights_list (list[Tensor]): Angle weights of each level - num_total_pos (int): Number of positive samples in all images - num_total_neg (int): Number of negative samples in all images """ inside_flags = rotated_anchor_inside_flags( flat_anchors, valid_flags, img_meta['img_shape'][:2], self.train_cfg.allowed_border) if not inside_flags.any(): return (None, ) * 9 # Assign gt and sample anchors anchors = flat_anchors[inside_flags, :] if self.assign_by_circumhbbox is not None: gt_bboxes_assign = obb2hbb(gt_bboxes, self.assign_by_circumhbbox) assign_result = self.assigner.assign( anchors, gt_bboxes_assign, gt_bboxes_ignore, None if self.sampling else gt_labels) else: assign_result = self.assigner.assign( anchors, gt_bboxes, gt_bboxes_ignore, None if self.sampling else gt_labels) sampling_result = self.sampler.sample(assign_result, anchors, gt_bboxes) num_valid_anchors = anchors.shape[0] bbox_targets = torch.zeros_like(anchors) bbox_weights = torch.zeros_like(anchors) angle_targets = torch.zeros_like(bbox_targets[:, 4:5]) angle_weights = torch.zeros_like(bbox_targets[:, 4:5]) labels = anchors.new_full((num_valid_anchors, ), self.num_classes, dtype=torch.long) label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) pos_inds = sampling_result.pos_inds neg_inds = sampling_result.neg_inds if len(pos_inds) > 0: if not self.reg_decoded_bbox: pos_bbox_targets = self.bbox_coder.encode( sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes) else: pos_bbox_targets = sampling_result.pos_gt_bboxes bbox_targets[pos_inds, :] = pos_bbox_targets bbox_weights[pos_inds, :] = 1.0 if self.use_encoded_angle: # Get encoded angle as target angle_targets[pos_inds, :] = pos_bbox_targets[:, 4:5] else: # Get gt angle as target angle_targets[pos_inds, :] = \ sampling_result.pos_gt_bboxes[:, 4:5] # Angle encoder angle_targets = self.angle_coder.encode(angle_targets) angle_weights[pos_inds, :] = 1.0 if gt_labels is None: # Only rpn gives gt_labels as None # Foreground is the first class since v2.5.0 labels[pos_inds] = 0 else: labels[pos_inds] = gt_labels[ sampling_result.pos_assigned_gt_inds] if self.train_cfg.pos_weight <= 0: label_weights[pos_inds] = 1.0 else: label_weights[pos_inds] = self.train_cfg.pos_weight if len(neg_inds) > 0: label_weights[neg_inds] = 1.0 # Map up to original set of anchors if unmap_outputs: num_total_anchors = flat_anchors.size(0) labels = unmap( labels, num_total_anchors, inside_flags, fill=self.num_classes) # fill bg label label_weights = unmap(label_weights, num_total_anchors, inside_flags) bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags) angle_targets = unmap(angle_targets, num_total_anchors, inside_flags) angle_weights = unmap(angle_weights, num_total_anchors, inside_flags) return (labels, label_weights, bbox_targets, bbox_weights, pos_inds, neg_inds, sampling_result, angle_targets, angle_weights) def _get_bboxes_single(self, cls_score_list, bbox_pred_list, angle_cls_list, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False, with_nms=True): """Transform outputs for a single batch item into bbox predictions. Args: cls_score_list (list[Tensor]): Box scores for a single scale level Has shape (num_anchors * num_classes, H, W). bbox_pred_list (list[Tensor]): Box energies / deltas for a single scale level with shape (num_anchors * 5, H, W). angle_cls_list (list[Tensor]): Angle deltas for a single scale level with shape (num_anchors * coding_len, H, W). mlvl_anchors (list[Tensor]): Box reference for a single scale level with shape (num_total_anchors, 5). img_shape (tuple[int]): Shape of the input image, (height, width, 3). scale_factor (ndarray): Scale factor of the image arange as (w_scale, h_scale, w_scale, h_scale). cfg (mmcv.Config): Test / postprocessing configuration, if None, test_cfg would be used. rescale (bool): If True, return boxes in original image space. Default: False. with_nms (bool): If True, do nms before return boxes. Default: True. Returns: Tensor: Labeled boxes in shape (n, 5), where the first 4 columns are bounding box positions (cx, cy, w, h, a) and the 6-th column is a score between 0 and 1. """ cfg = self.test_cfg if cfg is None else cfg assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors) mlvl_bboxes = [] mlvl_scores = [] for cls_score, bbox_pred, angle_cls, anchors in zip( cls_score_list, bbox_pred_list, angle_cls_list, mlvl_anchors): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] cls_score = cls_score.permute(1, 2, 0).reshape(-1, self.cls_out_channels) if self.use_sigmoid_cls: scores = cls_score.sigmoid() else: scores = cls_score.softmax(-1) bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 5) angle_cls = angle_cls.permute(1, 2, 0).reshape( -1, self.coding_len).sigmoid() nms_pre = cfg.get('nms_pre', -1) if scores.shape[0] > nms_pre > 0: # Get maximum scores for foreground classes. if self.use_sigmoid_cls: max_scores, _ = scores.max(dim=1) else: # Remind that we set FG labels to [0, num_class-1] # since mmdet v2.0 # BG cat_id: num_class max_scores, _ = scores[:, :-1].max(dim=1) _, topk_inds = max_scores.topk(nms_pre) anchors = anchors[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] angle_cls = angle_cls[topk_inds, :] # Angle decoder angle_pred = self.angle_coder.decode(angle_cls) if self.use_encoded_angle: bbox_pred[..., -1] = angle_pred bboxes = self.bbox_coder.decode( anchors, bbox_pred, max_shape=img_shape) else: bboxes = self.bbox_coder.decode( anchors, bbox_pred, max_shape=img_shape) bboxes[..., -1] = angle_pred mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_bboxes = torch.cat(mlvl_bboxes) if rescale: # Angle should not be rescaled mlvl_bboxes[:, :4] = mlvl_bboxes[:, :4] / mlvl_bboxes.new_tensor( scale_factor) mlvl_scores = torch.cat(mlvl_scores) if self.use_sigmoid_cls: # Add a dummy background class to the backend when using sigmoid # Remind that we set FG labels to [0, num_class-1] since mmdet v2.0 # BG cat_id: num_class padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) mlvl_scores = torch.cat([mlvl_scores, padding], dim=1) if with_nms: det_bboxes, det_labels = multiclass_nms_rotated( mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels else: return mlvl_bboxes, mlvl_scores
[docs] @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'angle_clses')) def get_bboxes(self, cls_scores, bbox_preds, angle_clses, img_metas, cfg=None, rescale=False, with_nms=True): """Transform network output for a batch into bbox predictions. Args: cls_scores (list[Tensor]): Box scores for each scale level Has shape (N, num_anchors * num_classes, H, W) bbox_preds (list[Tensor]): Box energies / deltas for each scale level with shape (N, num_anchors * 5, H, W) angle_clses (list[Tensor]): Box angles for each scale level with shape (N, num_anchors * coding_len, H, W) img_metas (list[dict]): Meta information of each image, e.g., image size, scaling factor, etc. cfg (mmcv.Config | None): Test / postprocessing configuration, if None, test_cfg would be used rescale (bool): If True, return boxes in original image space. Default: False. with_nms (bool): If True, do nms before return boxes. Default: True. Returns: list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple. The first item is an (n, 6) tensor, where the first 5 columns are bounding box positions (cx, cy, w, h, a) and the 6-th column is a score between 0 and 1. The second item is a (n,) tensor where each item is the predicted class label of the corresponding box. Example: >>> import mmcv >>> self = AnchorHead( >>> num_classes=9, >>> in_channels=1, >>> anchor_generator=dict( >>> type='AnchorGenerator', >>> scales=[8], >>> ratios=[0.5, 1.0, 2.0], >>> strides=[4,])) >>> img_metas = [{'img_shape': (32, 32, 3), 'scale_factor': 1}] >>> cfg = mmcv.Config(dict( >>> score_thr=0.00, >>> nms=dict(type='nms', iou_thr=1.0), >>> max_per_img=10)) >>> feat = torch.rand(1, 1, 3, 3) >>> cls_score, bbox_pred = self.forward_single(feat) >>> # Note the input lists are over different levels, not images >>> cls_scores, bbox_preds = [cls_score], [bbox_pred] >>> result_list = self.get_bboxes(cls_scores, bbox_preds, >>> img_metas, cfg) >>> det_bboxes, det_labels = result_list[0] >>> assert len(result_list) == 1 >>> assert det_bboxes.shape[1] == 5 >>> assert len(det_bboxes) == len(det_labels) == cfg.max_per_img """ assert len(cls_scores) == len(bbox_preds) num_levels = len(cls_scores) device = cls_scores[0].device featmap_sizes = [cls_scores[i].shape[-2:] for i in range(num_levels)] mlvl_anchors = self.anchor_generator.grid_priors( featmap_sizes, device=device) result_list = [] for img_id, _ in enumerate(img_metas): cls_score_list = [ cls_scores[i][img_id].detach() for i in range(num_levels) ] bbox_pred_list = [ bbox_preds[i][img_id].detach() for i in range(num_levels) ] angle_cls_list = [ angle_clses[i][img_id].detach() for i in range(num_levels) ] img_shape = img_metas[img_id]['img_shape'] scale_factor = img_metas[img_id]['scale_factor'] if with_nms: # Some heads don't support with_nms argument proposals = self._get_bboxes_single(cls_score_list, bbox_pred_list, angle_cls_list, mlvl_anchors, img_shape, scale_factor, cfg, rescale) else: proposals = self._get_bboxes_single(cls_score_list, bbox_pred_list, angle_cls_list, mlvl_anchors, img_shape, scale_factor, cfg, rescale, with_nms) result_list.append(proposals) return result_list
Read the Docs v: stable
Versions
latest
stable
1.x
v1.0.0rc0
v0.3.4
v0.3.3
v0.3.2
v0.3.1
v0.3.0
v0.2.0
v0.1.1
v0.1.0
main
dev
Downloads
pdf
html
epub
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.