Shortcuts

Source code for mmrotate.models.dense_heads.csl_rotated_retina_head

# Copyright (c) OpenMMLab. All rights reserved.

import torch
import torch.nn as nn
from mmcv.runner import force_fp32
from mmdet.core import images_to_levels, multi_apply, unmap

from mmrotate.core import build_bbox_coder, multiclass_nms_rotated
from ... import obb2hbb, rotated_anchor_inside_flags
from ..builder import ROTATED_HEADS, build_loss
from .rotated_retina_head import RotatedRetinaHead


[docs]@ROTATED_HEADS.register_module() class CSLRRetinaHead(RotatedRetinaHead): """Rotational Anchor-based refine head. Args: use_encoded_angle (bool): Decide whether to use encoded angle or gt angle as target. Default: True. shield_reg_angle (bool): Decide whether to shield the angle loss from reg branch. Default: False. angle_coder (dict): Config of angle coder. loss_angle (dict): Config of angle classification loss. init_cfg (dict or list[dict], optional): Initialization config dict. """ # noqa: W605 def __init__(self, use_encoded_angle=True, shield_reg_angle=False, angle_coder=dict( type='CSLCoder', angle_version='le90', omega=1, window='gaussian', radius=6), loss_angle=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), init_cfg=dict( type='Normal', layer='Conv2d', std=0.01, override=[ dict( type='Normal', name='retina_cls', std=0.01, bias_prob=0.01), dict( type='Normal', name='retina_angle_cls', std=0.01, bias_prob=0.01), ]), **kwargs): self.angle_coder = build_bbox_coder(angle_coder) self.coding_len = self.angle_coder.coding_len super(CSLRRetinaHead, self).__init__(**kwargs, init_cfg=init_cfg) self.shield_reg_angle = shield_reg_angle self.loss_angle = build_loss(loss_angle) self.use_encoded_angle = use_encoded_angle def _init_layers(self): """Initialize layers of the head.""" super(CSLRRetinaHead, self)._init_layers() self.retina_angle_cls = nn.Conv2d( self.feat_channels, self.num_anchors * self.coding_len, 3, padding=1)
[docs] def forward_single(self, x): """Forward feature of a single scale level. Args: x (torch.Tensor): Features of a single scale level. Returns: tuple (torch.Tensor): - cls_score (torch.Tensor): Cls scores for a single scale \ level the channels number is num_anchors * num_classes. - bbox_pred (torch.Tensor): Box energies / deltas for a \ single scale level, the channels number is num_anchors * 5. - angle_cls (torch.Tensor): Angle for a single scale level \ the channels number is num_anchors * coding_len. """ cls_feat = x reg_feat = x for cls_conv in self.cls_convs: cls_feat = cls_conv(cls_feat) for reg_conv in self.reg_convs: reg_feat = reg_conv(reg_feat) cls_score = self.retina_cls(cls_feat) bbox_pred = self.retina_reg(reg_feat) angle_cls = self.retina_angle_cls(reg_feat) return cls_score, bbox_pred, angle_cls
[docs] def loss_single(self, cls_score, bbox_pred, angle_cls, anchors, labels, label_weights, bbox_targets, bbox_weights, angle_targets, angle_weights, num_total_samples): """Compute loss of a single scale level. Args: cls_score (torch.Tensor): Box scores for each scale level Has shape (N, num_anchors * num_classes, H, W). bbox_pred (torch.Tensor): Box energies / deltas for each scale level with shape (N, num_anchors * 5, H, W). anchors (torch.Tensor): Box reference for each scale level with shape (N, num_total_anchors, 5). labels (torch.Tensor): Labels of each anchors with shape (N, num_total_anchors). label_weights (torch.Tensor): Label weights of each anchor with shape (N, num_total_anchors) bbox_targets (torch.Tensor): BBox regression targets of each anchor weight shape (N, num_total_anchors, 5). bbox_weights (torch.Tensor): BBox regression loss weights of each anchor with shape (N, num_total_anchors, 5). angle_targets (torch.Tensor): Angle classification targets of each anchor weight shape (N, num_total_anchors, coding_len). angle_weights (torch.Tensor): Angle classification loss weights of each anchor with shape (N, num_total_anchors, 1). num_total_samples (int): If sampling, num total samples equal to the number of total anchors; Otherwise, it is the number of positive anchors. Returns: tuple (torch.Tensor): - loss_cls (torch.Tensor): cls. loss for each scale level. - loss_bbox (torch.Tensor): reg. loss for each scale level. - loss_angle (torch.Tensor): angle cls. loss for each scale \ level. """ # Classification loss labels = labels.reshape(-1) label_weights = label_weights.reshape(-1) cls_score = cls_score.permute(0, 2, 3, 1).reshape(-1, self.cls_out_channels) loss_cls = self.loss_cls( cls_score, labels, label_weights, avg_factor=num_total_samples) # Regression loss bbox_targets = bbox_targets.reshape(-1, 5) bbox_weights = bbox_weights.reshape(-1, 5) # Shield angle in reg. branch if self.shield_reg_angle: bbox_weights[:, -1] = 0. bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 5) if self.reg_decoded_bbox: anchors = anchors.reshape(-1, 5) bbox_pred = self.bbox_coder.decode(anchors, bbox_pred) loss_bbox = self.loss_bbox( bbox_pred, bbox_targets, bbox_weights, avg_factor=num_total_samples) angle_cls = angle_cls.permute(0, 2, 3, 1).reshape(-1, self.coding_len) angle_targets = angle_targets.reshape(-1, self.coding_len) angle_weights = angle_weights.reshape(-1, 1) loss_angle = self.loss_angle( angle_cls, angle_targets, weight=angle_weights, avg_factor=num_total_samples) return loss_cls, loss_bbox, loss_angle
[docs] @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'angle_clses')) def loss(self, cls_scores, bbox_preds, angle_clses, gt_bboxes, gt_labels, img_metas, gt_bboxes_ignore=None): """Compute losses of the head. Args: cls_scores (list[Tensor]): Box scores for each scale level Has shape (N, num_anchors * num_classes, H, W) bbox_preds (list[Tensor]): Box energies / deltas for each scale level with shape (N, num_anchors * 5, H, W) angle_clses (list[Tensor]): Box angles for each scale level with shape (N, num_anchors * coding_len, H, W) gt_bboxes (list[Tensor]): Ground truth bboxes for each image with shape (num_gts, 5) in [cx, cy, w, h, a] format. gt_labels (list[Tensor]): class indices corresponding to each box img_metas (list[dict]): Meta information of each image, e.g., image size, scaling factor, etc. gt_bboxes_ignore (None | list[Tensor]): specify which bounding boxes can be ignored when computing the loss. Default: None Returns: dict[str, Tensor]: A dictionary of loss components. """ featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] assert len(featmap_sizes) == self.anchor_generator.num_levels device = cls_scores[0].device anchor_list, valid_flag_list = self.get_anchors( featmap_sizes, img_metas, device=device) label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1 cls_reg_targets = self.get_targets( anchor_list, valid_flag_list, gt_bboxes, img_metas, gt_bboxes_ignore_list=gt_bboxes_ignore, gt_labels_list=gt_labels, label_channels=label_channels) if cls_reg_targets is None: return None (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg, angel_target_list, angel_weight_list) = cls_reg_targets num_total_samples = ( num_total_pos + num_total_neg if self.sampling else num_total_pos) # Anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] # Concat all level anchors and flags to a single tensor concat_anchor_list = [] for i, _ in enumerate(anchor_list): concat_anchor_list.append(torch.cat(anchor_list[i])) all_anchor_list = images_to_levels(concat_anchor_list, num_level_anchors) losses_cls, losses_bbox, losses_angle = multi_apply( self.loss_single, cls_scores, bbox_preds, angle_clses, all_anchor_list, labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, angel_target_list, angel_weight_list, num_total_samples=num_total_samples) return dict( loss_cls=losses_cls, loss_bbox=losses_bbox, loss_angle=losses_angle)
def _get_targets_single(self, flat_anchors, valid_flags, gt_bboxes, gt_bboxes_ignore, gt_labels, img_meta, label_channels=1, unmap_outputs=True): """Compute regression and classification targets for anchors in a single image. Args: flat_anchors (torch.Tensor): Multi-level anchors of the image, which are concatenated into a single tensor of shape (num_anchors, 5) valid_flags (torch.Tensor): Multi level valid flags of the image, which are concatenated into a single tensor of shape (num_anchors,). gt_bboxes (torch.Tensor): Ground truth bboxes of the image, shape (num_gts, 5). img_meta (dict): Meta info of the image. gt_bboxes_ignore (torch.Tensor): Ground truth bboxes to be ignored, shape (num_ignored_gts, 5). img_meta (dict): Meta info of the image. gt_labels (torch.Tensor): Ground truth labels of each box, shape (num_gts,). label_channels (int): Channel of label. Default: 1. unmap_outputs (bool): Whether to map outputs back to the original set of anchors. Default: True. Returns: tuple (list[Tensor]): - labels_list (list[Tensor]): Labels of each level - label_weights_list (list[Tensor]): Label weights of each \ level - bbox_targets_list (list[Tensor]): BBox targets of each level - bbox_weights_list (list[Tensor]): BBox weights of each level - angle_targets_list (list[Tensor]): Angle targets of each \ level - angle_weights_list (list[Tensor]): Angle weights of each level - num_total_pos (int): Number of positive samples in all images - num_total_neg (int): Number of negative samples in all images """ inside_flags = rotated_anchor_inside_flags( flat_anchors, valid_flags, img_meta['img_shape'][:2], self.train_cfg.allowed_border) if not inside_flags.any(): return (None, ) * 9 # Assign gt and sample anchors anchors = flat_anchors[inside_flags, :] if self.assign_by_circumhbbox is not None: gt_bboxes_assign = obb2hbb(gt_bboxes, self.assign_by_circumhbbox) assign_result = self.assigner.assign( anchors, gt_bboxes_assign, gt_bboxes_ignore, None if self.sampling else gt_labels) else: assign_result = self.assigner.assign( anchors, gt_bboxes, gt_bboxes_ignore, None if self.sampling else gt_labels) sampling_result = self.sampler.sample(assign_result, anchors, gt_bboxes) num_valid_anchors = anchors.shape[0] bbox_targets = torch.zeros_like(anchors) bbox_weights = torch.zeros_like(anchors) angle_targets = torch.zeros_like(bbox_targets[:, 4:5]) angle_weights = torch.zeros_like(bbox_targets[:, 4:5]) labels = anchors.new_full((num_valid_anchors, ), self.num_classes, dtype=torch.long) label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) pos_inds = sampling_result.pos_inds neg_inds = sampling_result.neg_inds if len(pos_inds) > 0: if not self.reg_decoded_bbox: pos_bbox_targets = self.bbox_coder.encode( sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes) else: pos_bbox_targets = sampling_result.pos_gt_bboxes bbox_targets[pos_inds, :] = pos_bbox_targets bbox_weights[pos_inds, :] = 1.0 if self.use_encoded_angle: # Get encoded angle as target angle_targets[pos_inds, :] = pos_bbox_targets[:, 4:5] else: # Get gt angle as target angle_targets[pos_inds, :] = \ sampling_result.pos_gt_bboxes[:, 4:5] # Angle encoder angle_targets = self.angle_coder.encode(angle_targets) angle_weights[pos_inds, :] = 1.0 if gt_labels is None: # Only rpn gives gt_labels as None # Foreground is the first class since v2.5.0 labels[pos_inds] = 0 else: labels[pos_inds] = gt_labels[ sampling_result.pos_assigned_gt_inds] if self.train_cfg.pos_weight <= 0: label_weights[pos_inds] = 1.0 else: label_weights[pos_inds] = self.train_cfg.pos_weight if len(neg_inds) > 0: label_weights[neg_inds] = 1.0 # Map up to original set of anchors if unmap_outputs: num_total_anchors = flat_anchors.size(0) labels = unmap( labels, num_total_anchors, inside_flags, fill=self.num_classes) # fill bg label label_weights = unmap(label_weights, num_total_anchors, inside_flags) bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags) angle_targets = unmap(angle_targets, num_total_anchors, inside_flags) angle_weights = unmap(angle_weights, num_total_anchors, inside_flags) return (labels, label_weights, bbox_targets, bbox_weights, pos_inds, neg_inds, sampling_result, angle_targets, angle_weights) def _get_bboxes_single(self, cls_score_list, bbox_pred_list, angle_cls_list, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False, with_nms=True): """Transform outputs for a single batch item into bbox predictions. Args: cls_score_list (list[Tensor]): Box scores for a single scale level Has shape (num_anchors * num_classes, H, W). bbox_pred_list (list[Tensor]): Box energies / deltas for a single scale level with shape (num_anchors * 5, H, W). angle_cls_list (list[Tensor]): Angle deltas for a single scale level with shape (num_anchors * coding_len, H, W). mlvl_anchors (list[Tensor]): Box reference for a single scale level with shape (num_total_anchors, 5). img_shape (tuple[int]): Shape of the input image, (height, width, 3). scale_factor (ndarray): Scale factor of the image arange as (w_scale, h_scale, w_scale, h_scale). cfg (mmcv.Config): Test / postprocessing configuration, if None, test_cfg would be used. rescale (bool): If True, return boxes in original image space. Default: False. with_nms (bool): If True, do nms before return boxes. Default: True. Returns: Tensor: Labeled boxes in shape (n, 5), where the first 4 columns are bounding box positions (cx, cy, w, h, a) and the 6-th column is a score between 0 and 1. """ cfg = self.test_cfg if cfg is None else cfg assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors) mlvl_bboxes = [] mlvl_scores = [] for cls_score, bbox_pred, angle_cls, anchors in zip( cls_score_list, bbox_pred_list, angle_cls_list, mlvl_anchors): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] cls_score = cls_score.permute(1, 2, 0).reshape(-1, self.cls_out_channels) if self.use_sigmoid_cls: scores = cls_score.sigmoid() else: scores = cls_score.softmax(-1) bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 5) angle_cls = angle_cls.permute(1, 2, 0).reshape( -1, self.coding_len).sigmoid() nms_pre = cfg.get('nms_pre', -1) if scores.shape[0] > nms_pre > 0: # Get maximum scores for foreground classes. if self.use_sigmoid_cls: max_scores, _ = scores.max(dim=1) else: # Remind that we set FG labels to [0, num_class-1] # since mmdet v2.0 # BG cat_id: num_class max_scores, _ = scores[:, :-1].max(dim=1) _, topk_inds = max_scores.topk(nms_pre) anchors = anchors[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] angle_cls = angle_cls[topk_inds, :] # Angle decoder angle_pred = self.angle_coder.decode(angle_cls) if self.use_encoded_angle: bbox_pred[..., -1] = angle_pred bboxes = self.bbox_coder.decode( anchors, bbox_pred, max_shape=img_shape) else: bboxes = self.bbox_coder.decode( anchors, bbox_pred, max_shape=img_shape) bboxes[..., -1] = angle_pred mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_bboxes = torch.cat(mlvl_bboxes) if rescale: # Angle should not be rescaled mlvl_bboxes[:, :4] = mlvl_bboxes[:, :4] / mlvl_bboxes.new_tensor( scale_factor) mlvl_scores = torch.cat(mlvl_scores) if self.use_sigmoid_cls: # Add a dummy background class to the backend when using sigmoid # Remind that we set FG labels to [0, num_class-1] since mmdet v2.0 # BG cat_id: num_class padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) mlvl_scores = torch.cat([mlvl_scores, padding], dim=1) if with_nms: det_bboxes, det_labels = multiclass_nms_rotated( mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels else: return mlvl_bboxes, mlvl_scores
[docs] @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'angle_clses')) def get_bboxes(self, cls_scores, bbox_preds, angle_clses, img_metas, cfg=None, rescale=False, with_nms=True): """Transform network output for a batch into bbox predictions. Args: cls_scores (list[Tensor]): Box scores for each scale level Has shape (N, num_anchors * num_classes, H, W) bbox_preds (list[Tensor]): Box energies / deltas for each scale level with shape (N, num_anchors * 5, H, W) angle_clses (list[Tensor]): Box angles for each scale level with shape (N, num_anchors * coding_len, H, W) img_metas (list[dict]): Meta information of each image, e.g., image size, scaling factor, etc. cfg (mmcv.Config | None): Test / postprocessing configuration, if None, test_cfg would be used rescale (bool): If True, return boxes in original image space. Default: False. with_nms (bool): If True, do nms before return boxes. Default: True. Returns: list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple. The first item is an (n, 6) tensor, where the first 5 columns are bounding box positions (cx, cy, w, h, a) and the 6-th column is a score between 0 and 1. The second item is a (n,) tensor where each item is the predicted class label of the corresponding box. Example: >>> import mmcv >>> self = AnchorHead( >>> num_classes=9, >>> in_channels=1, >>> anchor_generator=dict( >>> type='AnchorGenerator', >>> scales=[8], >>> ratios=[0.5, 1.0, 2.0], >>> strides=[4,])) >>> img_metas = [{'img_shape': (32, 32, 3), 'scale_factor': 1}] >>> cfg = mmcv.Config(dict( >>> score_thr=0.00, >>> nms=dict(type='nms', iou_thr=1.0), >>> max_per_img=10)) >>> feat = torch.rand(1, 1, 3, 3) >>> cls_score, bbox_pred = self.forward_single(feat) >>> # Note the input lists are over different levels, not images >>> cls_scores, bbox_preds = [cls_score], [bbox_pred] >>> result_list = self.get_bboxes(cls_scores, bbox_preds, >>> img_metas, cfg) >>> det_bboxes, det_labels = result_list[0] >>> assert len(result_list) == 1 >>> assert det_bboxes.shape[1] == 5 >>> assert len(det_bboxes) == len(det_labels) == cfg.max_per_img """ assert len(cls_scores) == len(bbox_preds) num_levels = len(cls_scores) device = cls_scores[0].device featmap_sizes = [cls_scores[i].shape[-2:] for i in range(num_levels)] mlvl_anchors = self.anchor_generator.grid_priors( featmap_sizes, device=device) result_list = [] for img_id, _ in enumerate(img_metas): cls_score_list = [ cls_scores[i][img_id].detach() for i in range(num_levels) ] bbox_pred_list = [ bbox_preds[i][img_id].detach() for i in range(num_levels) ] angle_cls_list = [ angle_clses[i][img_id].detach() for i in range(num_levels) ] img_shape = img_metas[img_id]['img_shape'] scale_factor = img_metas[img_id]['scale_factor'] if with_nms: # Some heads don't support with_nms argument proposals = self._get_bboxes_single(cls_score_list, bbox_pred_list, angle_cls_list, mlvl_anchors, img_shape, scale_factor, cfg, rescale) else: proposals = self._get_bboxes_single(cls_score_list, bbox_pred_list, angle_cls_list, mlvl_anchors, img_shape, scale_factor, cfg, rescale, with_nms) result_list.append(proposals) return result_list
Read the Docs v: v0.2.0
Versions
latest
stable
v0.2.0
v0.1.1
v0.1.0
main
dev
Downloads
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.