Shortcuts

Source code for mmrotate.core.bbox.coder.delta_midpointoffset_rbbox_coder

# Copyright (c) OpenMMLab. All rights reserved.
# Modified from jbwang1997: https://github.com/jbwang1997/OBBDetection
import mmcv
import numpy as np
import torch
from mmdet.core.bbox.coder.base_bbox_coder import BaseBBoxCoder

from ..builder import ROTATED_BBOX_CODERS
from ..transforms import obb2poly, obb2xyxy, poly2obb


[docs]@ROTATED_BBOX_CODERS.register_module() class MidpointOffsetCoder(BaseBBoxCoder): """Mid point offset coder. This coder encodes bbox (x1, y1, x2, y2) into \ delta (dx, dy, dw, dh, da, db) and decodes delta (dx, dy, dw, dh, da, db) \ back to original bbox (x1, y1, x2, y2). Args: target_means (Sequence[float]): Denormalizing means of target for delta coordinates target_stds (Sequence[float]): Denormalizing standard deviation of target for delta coordinates angle_range (str, optional): Angle representations. Defaults to 'oc'. """ def __init__(self, target_means=(0., 0., 0., 0., 0., 0.), target_stds=(1., 1., 1., 1., 1., 1.), angle_range='oc'): super(BaseBBoxCoder, self).__init__() self.means = target_means self.stds = target_stds self.version = angle_range
[docs] def encode(self, bboxes, gt_bboxes): """Get box regression transformation deltas that can be used to transform the ``bboxes`` into the ``gt_bboxes``. Args: bboxes (torch.Tensor): Source boxes, e.g., object proposals. gt_bboxes (torch.Tensor): Target of the transformation, e.g., ground-truth boxes. Returns: torch.Tensor: Box transformation deltas """ assert bboxes.size(0) == gt_bboxes.size(0) assert bboxes.size(-1) == 4 assert gt_bboxes.size(-1) == 5 encoded_bboxes = bbox2delta(bboxes, gt_bboxes, self.means, self.stds, self.version) return encoded_bboxes
[docs] def decode(self, bboxes, pred_bboxes, max_shape=None, wh_ratio_clip=16 / 1000): """Apply transformation `pred_bboxes` to `bboxes`. Args: bboxes (torch.Tensor): Basic boxes. Shape (B, N, 4) or (N, 4) pred_bboxes (torch.Tensor): Encoded offsets with respect to each roi. Has shape (B, N, 5) or (N, 5). Note N = num_anchors * W * H when rois is a grid of anchors. max_shape (Sequence[int] or torch.Tensor or Sequence[ Sequence[int]],optional): Maximum bounds for boxes, specifies (H, W, C) or (H, W). If bboxes shape is (B, N, 6), then the max_shape should be a Sequence[Sequence[int]] and the length of max_shape should also be B. wh_ratio_clip (float, optional): The allowed ratio between width and height. Returns: torch.Tensor: Decoded boxes. """ assert pred_bboxes.size(0) == bboxes.size(0) assert bboxes.size(-1) == 4 assert pred_bboxes.size(-1) == 6 decoded_bboxes = delta2bbox(bboxes, pred_bboxes, self.means, self.stds, wh_ratio_clip, self.version) return decoded_bboxes
@mmcv.jit(coderize=True) def bbox2delta(proposals, gt, means=(0., 0., 0., 0., 0., 0.), stds=(1., 1., 1., 1., 1., 1.), version='oc'): """Compute deltas of proposals w.r.t. gt. We usually compute the deltas of x, y, w, h, a, b of proposals w.r.t ground truth bboxes to get regression target. This is the inverse function of :func:`delta2bbox`. Args: proposals (torch.Tensor): Boxes to be transformed, shape (N, ..., 4) gt (torch.Tensor): Gt bboxes to be used as base, shape (N, ..., 5) means (Sequence[float]): Denormalizing means for delta coordinates stds (Sequence[float]): Denormalizing standard deviation for delta coordinates. version (str, optional): Angle representations. Defaults to 'oc'. Returns: Tensor: deltas with shape (N, 6), where columns represent dx, dy, dw, dh, da, db. """ proposals = proposals.float() gt = gt.float() px = (proposals[..., 0] + proposals[..., 2]) * 0.5 py = (proposals[..., 1] + proposals[..., 3]) * 0.5 pw = proposals[..., 2] - proposals[..., 0] ph = proposals[..., 3] - proposals[..., 1] hbb, poly = obb2xyxy(gt, version), obb2poly(gt, version) gx = (hbb[..., 0] + hbb[..., 2]) * 0.5 gy = (hbb[..., 1] + hbb[..., 3]) * 0.5 gw = hbb[..., 2] - hbb[..., 0] gh = hbb[..., 3] - hbb[..., 1] x_coor, y_coor = poly[:, 0::2], poly[:, 1::2] y_min, _ = torch.min(y_coor, dim=1, keepdim=True) x_max, _ = torch.max(x_coor, dim=1, keepdim=True) _x_coor = x_coor.clone() _x_coor[torch.abs(y_coor - y_min) > 0.1] = -1000 ga, _ = torch.max(_x_coor, dim=1) _y_coor = y_coor.clone() _y_coor[torch.abs(x_coor - x_max) > 0.1] = -1000 gb, _ = torch.max(_y_coor, dim=1) dx = (gx - px) / pw dy = (gy - py) / ph dw = torch.log(gw / pw) dh = torch.log(gh / ph) da = (ga - gx) / gw db = (gb - gy) / gh deltas = torch.stack([dx, dy, dw, dh, da, db], dim=-1) means = deltas.new_tensor(means).unsqueeze(0) stds = deltas.new_tensor(stds).unsqueeze(0) deltas = deltas.sub_(means).div_(stds) return deltas @mmcv.jit(coderize=True) def delta2bbox(rois, deltas, means=(0., 0., 0., 0., 0., 0.), stds=(1., 1., 1., 1., 1., 1.), wh_ratio_clip=16 / 1000, version='oc'): """Apply deltas to shift/scale base boxes. Typically the rois are anchor or proposed bounding boxes and the deltas are network outputs used to shift/scale those boxes. This is the inverse function of :func:`bbox2delta`. Args: rois (torch.Tensor): Boxes to be transformed. Has shape (N, 4). deltas (torch.Tensor): Encoded offsets relative to each roi. Has shape (N, num_classes * 4) or (N, 4). Note N = num_base_anchors * W * H, when rois is a grid of anchors. means (Sequence[float]): Denormalizing means for delta coordinates. Default (0., 0., 0., 0., 0., 0.). stds (Sequence[float]): Denormalizing standard deviation for delta coordinates. Default (1., 1., 1., 1., 1., 1.). wh_ratio_clip (float): Maximum aspect ratio for boxes. Default 16 / 1000. version (str, optional): Angle representations. Defaults to 'oc'. Returns: Tensor: Boxes with shape (N, num_classes * 5) or (N, 5), where 5 represent cx, cy, w, h, a. """ means = deltas.new_tensor(means).repeat(1, deltas.size(1) // 6) stds = deltas.new_tensor(stds).repeat(1, deltas.size(1) // 6) denorm_deltas = deltas * stds + means dx = denorm_deltas[:, 0::6] dy = denorm_deltas[:, 1::6] dw = denorm_deltas[:, 2::6] dh = denorm_deltas[:, 3::6] da = denorm_deltas[:, 4::6] db = denorm_deltas[:, 5::6] max_ratio = np.abs(np.log(wh_ratio_clip)) dw = dw.clamp(min=-max_ratio, max=max_ratio) dh = dh.clamp(min=-max_ratio, max=max_ratio) # Compute center of each roi px = ((rois[:, 0] + rois[:, 2]) * 0.5).unsqueeze(1).expand_as(dx) py = ((rois[:, 1] + rois[:, 3]) * 0.5).unsqueeze(1).expand_as(dy) # Compute width/height of each roi pw = (rois[:, 2] - rois[:, 0]).unsqueeze(1).expand_as(dw) ph = (rois[:, 3] - rois[:, 1]).unsqueeze(1).expand_as(dh) # Use exp(network energy) to enlarge/shrink each roi gw = pw * dw.exp() gh = ph * dh.exp() # Use network energy to shift the center of each roi gx = px + pw * dx gy = py + ph * dy x1 = gx - gw * 0.5 y1 = gy - gh * 0.5 x2 = gx + gw * 0.5 y2 = gy + gh * 0.5 da = da.clamp(min=-0.5, max=0.5) db = db.clamp(min=-0.5, max=0.5) ga = gx + da * gw _ga = gx - da * gw gb = gy + db * gh _gb = gy - db * gh polys = torch.stack([ga, y1, x2, gb, _ga, y2, x1, _gb], dim=-1) center = torch.stack([gx, gy, gx, gy, gx, gy, gx, gy], dim=-1) center_polys = polys - center diag_len = torch.sqrt(center_polys[..., 0::2] * center_polys[..., 0::2] + center_polys[..., 1::2] * center_polys[..., 1::2]) max_diag_len, _ = torch.max(diag_len, dim=-1, keepdim=True) diag_scale_factor = max_diag_len / diag_len center_polys = center_polys * diag_scale_factor.repeat_interleave( 2, dim=-1) rectpolys = center_polys + center obboxes = poly2obb(rectpolys, version) return obboxes
Read the Docs v: v0.3.4
Versions
latest
stable
1.x
v1.0.0rc0
v0.3.4
v0.3.3
v0.3.2
v0.3.1
v0.3.0
v0.2.0
v0.1.1
v0.1.0
main
dev
Downloads
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.