
Source code for mmrotate.datasets.pipelines.transforms

# Copyright (c) OpenMMLab. All rights reserved.
import copy

import cv2
import mmcv
import numpy as np
from mmdet.datasets.pipelines.transforms import (Mosaic, RandomCrop,
                                                 RandomFlip, Resize)
from numpy import random

from mmrotate.core import norm_angle, obb2poly_np, poly2obb_np
from ..builder import ROTATED_PIPELINES

[docs]@ROTATED_PIPELINES.register_module() class RResize(Resize): """Resize images & rotated bbox Inherit Resize pipeline class to handle rotated bboxes. Args: img_scale (tuple or list[tuple]): Images scales for resizing. multiscale_mode (str): Either "range" or "value". ratio_range (tuple[float]): (min_ratio, max_ratio). """ def __init__(self, img_scale=None, multiscale_mode='range', ratio_range=None): super(RResize, self).__init__( img_scale=img_scale, multiscale_mode=multiscale_mode, ratio_range=ratio_range, keep_ratio=True) def _resize_bboxes(self, results): """Resize bounding boxes with ``results['scale_factor']``.""" for key in results.get('bbox_fields', []): bboxes = results[key] orig_shape = bboxes.shape bboxes = bboxes.reshape((-1, 5)) w_scale, h_scale, _, _ = results['scale_factor'] bboxes[:, 0] *= w_scale bboxes[:, 1] *= h_scale bboxes[:, 2:4] *= np.sqrt(w_scale * h_scale) results[key] = bboxes.reshape(orig_shape)
[docs]@ROTATED_PIPELINES.register_module() class RRandomFlip(RandomFlip): """ Args: flip_ratio (float | list[float], optional): The flipping probability. Default: None. direction(str | list[str], optional): The flipping direction. Options are 'horizontal', 'vertical', 'diagonal'. version (str, optional): Angle representations. Defaults to 'oc'. """ def __init__(self, flip_ratio=None, direction='horizontal', version='oc'): self.version = version super(RRandomFlip, self).__init__(flip_ratio, direction)
[docs] def bbox_flip(self, bboxes, img_shape, direction): """Flip bboxes horizontally or vertically. Args: bboxes(ndarray): shape (..., 5*k) img_shape(tuple): (height, width) Returns: numpy.ndarray: Flipped bounding boxes. """ assert bboxes.shape[-1] % 5 == 0 orig_shape = bboxes.shape bboxes = bboxes.reshape((-1, 5)) flipped = bboxes.copy() if direction == 'horizontal': flipped[:, 0] = img_shape[1] - bboxes[:, 0] - 1 elif direction == 'vertical': flipped[:, 1] = img_shape[0] - bboxes[:, 1] - 1 elif direction == 'diagonal': flipped[:, 0] = img_shape[1] - bboxes[:, 0] - 1 flipped[:, 1] = img_shape[0] - bboxes[:, 1] - 1 return flipped.reshape(orig_shape) else: raise ValueError(f'Invalid flipping direction "{direction}"') if self.version == 'oc': rotated_flag = (bboxes[:, 4] != np.pi / 2) flipped[rotated_flag, 4] = np.pi / 2 - bboxes[rotated_flag, 4] flipped[rotated_flag, 2] = bboxes[rotated_flag, 3] flipped[rotated_flag, 3] = bboxes[rotated_flag, 2] else: flipped[:, 4] = norm_angle(np.pi - bboxes[:, 4], self.version) return flipped.reshape(orig_shape)
[docs]@ROTATED_PIPELINES.register_module() class PolyRandomRotate(object): """Rotate img & bbox. Reference: Args: rotate_ratio (float, optional): The rotating probability. Default: 0.5. mode (str, optional) : Indicates whether the angle is chosen in a random range (mode='range') or in a preset list of angles (mode='value'). Defaults to 'range'. angles_range(int|list[int], optional): The range of angles. If mode='range', angle_ranges is an int and the angle is chosen in (-angles_range, +angles_ranges). If mode='value', angles_range is a non-empty list of int and the angle is chosen in angles_range. Defaults to 180 as default mode is 'range'. auto_bound(bool, optional): whether to find the new width and height bounds. rect_classes (None|list, optional): Specifies classes that needs to be rotated by a multiple of 90 degrees. version (str, optional): Angle representations. Defaults to 'le90'. """ def __init__(self, rotate_ratio=0.5, mode='range', angles_range=180, auto_bound=False, rect_classes=None, version='le90'): self.rotate_ratio = rotate_ratio self.auto_bound = auto_bound assert mode in ['range', 'value'], \ f"mode is supposed to be 'range' or 'value', but got {mode}." if mode == 'range': assert isinstance(angles_range, int), \ "mode 'range' expects angle_range to be an int." else: assert mmcv.is_seq_of(angles_range, int) and len(angles_range), \ "mode 'value' expects angle_range as a non-empty list of int." self.mode = mode self.angles_range = angles_range self.discrete_range = [90, 180, -90, -180] self.rect_classes = rect_classes self.version = version @property def is_rotate(self): """Randomly decide whether to rotate.""" return np.random.rand() < self.rotate_ratio
[docs] def apply_image(self, img, bound_h, bound_w, interp=cv2.INTER_LINEAR): """ img should be a numpy array, formatted as Height * Width * Nchannels """ if len(img) == 0: return img return cv2.warpAffine( img, self.rm_image, (bound_w, bound_h), flags=interp)
[docs] def apply_coords(self, coords): """ coords should be a N * 2 array-like, containing N couples of (x, y) points """ if len(coords) == 0: return coords coords = np.asarray(coords, dtype=float) return cv2.transform(coords[:, np.newaxis, :], self.rm_coords)[:, 0, :]
[docs] def create_rotation_matrix(self, center, angle, bound_h, bound_w, offset=0): """Create rotation matrix.""" center += offset rm = cv2.getRotationMatrix2D(tuple(center), angle, 1) if self.auto_bound: rot_im_center = cv2.transform(center[None, None, :] + offset, rm)[0, 0, :] new_center = np.array([bound_w / 2, bound_h / 2 ]) + offset - rot_im_center rm[:, 2] += new_center return rm
[docs] def filter_border(self, bboxes, h, w): """Filter the box whose center point is outside or whose side length is less than 5.""" x_ctr, y_ctr = bboxes[:, 0], bboxes[:, 1] w_bbox, h_bbox = bboxes[:, 2], bboxes[:, 3] keep_inds = (x_ctr > 0) & (x_ctr < w) & (y_ctr > 0) & (y_ctr < h) & \ (w_bbox > 5) & (h_bbox > 5) return keep_inds
def __call__(self, results): """Call function of PolyRandomRotate.""" if not self.is_rotate: results['rotate'] = False angle = 0 else: results['rotate'] = True if self.mode == 'range': angle = self.angles_range * (2 * np.random.rand() - 1) else: i = np.random.randint(len(self.angles_range)) angle = self.angles_range[i] class_labels = results['gt_labels'] for classid in class_labels: if self.rect_classes: if classid in self.rect_classes: np.random.shuffle(self.discrete_range) angle = self.discrete_range[0] break h, w, c = results['img_shape'] img = results['img'] results['rotate_angle'] = angle image_center = np.array((w / 2, h / 2)) abs_cos, abs_sin = \ abs(np.cos(angle / 180 * np.pi)), abs(np.sin(angle / 180 * np.pi)) if self.auto_bound: bound_w, bound_h = np.rint( [h * abs_sin + w * abs_cos, h * abs_cos + w * abs_sin]).astype(int) else: bound_w, bound_h = w, h self.rm_coords = self.create_rotation_matrix(image_center, angle, bound_h, bound_w) self.rm_image = self.create_rotation_matrix( image_center, angle, bound_h, bound_w, offset=-0.5) img = self.apply_image(img, bound_h, bound_w) results['img'] = img results['img_shape'] = (bound_h, bound_w, c) gt_bboxes = results.get('gt_bboxes', []) labels = results.get('gt_labels', []) gt_bboxes = np.concatenate( [gt_bboxes, np.zeros((gt_bboxes.shape[0], 1))], axis=-1) polys = obb2poly_np(gt_bboxes, self.version)[:, :-1].reshape(-1, 2) polys = self.apply_coords(polys).reshape(-1, 8) gt_bboxes = [] for pt in polys: pt = np.array(pt, dtype=np.float32) obb = poly2obb_np(pt, self.version) \ if poly2obb_np(pt, self.version) is not None\ else [0, 0, 0, 0, 0] gt_bboxes.append(obb) gt_bboxes = np.array(gt_bboxes, dtype=np.float32) keep_inds = self.filter_border(gt_bboxes, bound_h, bound_w) gt_bboxes = gt_bboxes[keep_inds, :] labels = labels[keep_inds] if len(gt_bboxes) == 0: return None results['gt_bboxes'] = gt_bboxes results['gt_labels'] = labels return results def __repr__(self): repr_str = self.__class__.__name__ repr_str += f'(rotate_ratio={self.rotate_ratio}, ' \ f'base_angles={self.base_angles}, ' \ f'angles_range={self.angles_range}, ' \ f'auto_bound={self.auto_bound})' return repr_str
@ROTATED_PIPELINES.register_module() class RRandomCrop(RandomCrop): """Random crop the image & bboxes. The absolute `crop_size` is sampled based on `crop_type` and `image_size`, then the cropped results are generated. Args: crop_size (tuple): The relative ratio or absolute pixels of height and width. crop_type (str, optional): one of "relative_range", "relative", "absolute", "absolute_range". "relative" randomly crops (h * crop_size[0], w * crop_size[1]) part from an input of size (h, w). "relative_range" uniformly samples relative crop size from range [crop_size[0], 1] and [crop_size[1], 1] for height and width respectively. "absolute" crops from an input with absolute size (crop_size[0], crop_size[1]). "absolute_range" uniformly samples crop_h in range [crop_size[0], min(h, crop_size[1])] and crop_w in range [crop_size[0], min(w, crop_size[1])]. Default "absolute". allow_negative_crop (bool, optional): Whether to allow a crop that does not contain any bbox area. Default False. Note: - If the image is smaller than the absolute crop size, return the original image. - The keys for bboxes, labels must be aligned. That is, `gt_bboxes` corresponds to `gt_labels`, and `gt_bboxes_ignore` corresponds to `gt_labels_ignore`. - If the crop does not contain any gt-bbox region and `allow_negative_crop` is set to False, skip this image. """ def __init__(self, crop_size, crop_type='absolute', allow_negative_crop=False, version='oc'): self.version = version super(RRandomCrop, self).__init__(crop_size, crop_type, allow_negative_crop) def _crop_data(self, results, crop_size, allow_negative_crop): """Function to randomly crop images, bounding boxes. Args: results (dict): Result dict from loading pipeline. crop_size (tuple): Expected absolute size after cropping, (h, w). allow_negative_crop (bool): Whether to allow a crop that does not contain any bbox area. Default to False. Returns: dict: Randomly cropped results, 'img_shape' key in result dict is updated according to crop size. """ assert crop_size[0] > 0 and crop_size[1] > 0 for key in results.get('bbox_fields', []): assert results[key].shape[-1] % 5 == 0 for key in results.get('img_fields', ['img']): img = results[key] margin_h = max(img.shape[0] - crop_size[0], 0) margin_w = max(img.shape[1] - crop_size[1], 0) offset_h = np.random.randint(0, margin_h + 1) offset_w = np.random.randint(0, margin_w + 1) crop_y1, crop_y2 = offset_h, offset_h + crop_size[0] crop_x1, crop_x2 = offset_w, offset_w + crop_size[1] # crop the image img = img[crop_y1:crop_y2, crop_x1:crop_x2, ...] img_shape = img.shape results[key] = img results['img_shape'] = img_shape height, width, _ = img_shape # crop bboxes accordingly and clip to the image boundary for key in results.get('bbox_fields', []): # e.g. gt_bboxes and gt_bboxes_ignore bbox_offset = np.array([offset_w, offset_h, 0, 0, 0], dtype=np.float32) bboxes = results[key] - bbox_offset valid_inds = (bboxes[:, 0] >= 0) & (bboxes[:, 0] < width) & (bboxes[:, 1] >= 0) & ( bboxes[:, 1] < height) # If the crop does not contain any gt-bbox area and # allow_negative_crop is False, skip this image. if (key == 'gt_bboxes' and not valid_inds.any() and not allow_negative_crop): return None results[key] = bboxes[valid_inds, :] # label fields. e.g. gt_labels and gt_labels_ignore label_key = self.bbox2label.get(key) if label_key in results: results[label_key] = results[label_key][valid_inds] return results
[docs]@ROTATED_PIPELINES.register_module() class RMosaic(Mosaic): """Rotate Mosaic augmentation. Inherit from `mmdet.datasets.pipelines.transforms.Mosaic`. Given 4 images, mosaic transform combines them into one output image. The output image is composed of the parts from each sub- image. .. code:: text mosaic transform center_x +------------------------------+ | pad | pad | | +-----------+ | | | | | | | image1 |--------+ | | | | | | | | | image2 | | center_y |----+-------------+-----------| | | cropped | | |pad | image3 | image4 | | | | | +----|-------------+-----------+ | | +-------------+ The mosaic transform steps are as follows: 1. Choose the mosaic center as the intersections of 4 images 2. Get the left top image according to the index, and randomly sample another 3 images from the custom dataset. 3. Sub image will be cropped if image is larger than mosaic patch Args: img_scale (Sequence[int]): Image size after mosaic pipeline of single image. The shape order should be (height, width). Defaults to (640, 640). center_ratio_range (Sequence[float]): Center ratio range of mosaic output. Defaults to (0.5, 1.5). min_bbox_size (int | float): The minimum pixel for filtering invalid bboxes after the mosaic pipeline. Defaults to 0. bbox_clip_border (bool, optional): Whether to clip the objects outside the border of the image. In some dataset like MOT17, the gt bboxes are allowed to cross the border of images. Therefore, we don't need to clip the gt bboxes in these cases. Defaults to True. skip_filter (bool): Whether to skip filtering rules. If it is True, the filter rule will not be applied, and the `min_bbox_size` is invalid. Defaults to True. pad_val (int): Pad value. Defaults to 114. prob (float): Probability of applying this transformation. Defaults to 1.0. version (str, optional): Angle representations. Defaults to `oc`. """ def __init__(self, img_scale=(640, 640), center_ratio_range=(0.5, 1.5), min_bbox_size=10, bbox_clip_border=True, skip_filter=True, pad_val=114, prob=1.0, version='oc'): super(RMosaic, self).__init__( img_scale=img_scale, center_ratio_range=center_ratio_range, min_bbox_size=min_bbox_size, bbox_clip_border=bbox_clip_border, skip_filter=skip_filter, pad_val=pad_val, prob=1.0) def _mosaic_transform(self, results): """Mosaic transform function. Args: results (dict): Result dict. Returns: dict: Updated result dict. """ assert 'mix_results' in results mosaic_labels = [] mosaic_bboxes = [] if len(results['img'].shape) == 3: mosaic_img = np.full( (int(self.img_scale[0] * 2), int(self.img_scale[1] * 2), 3), self.pad_val, dtype=results['img'].dtype) else: mosaic_img = np.full( (int(self.img_scale[0] * 2), int(self.img_scale[1] * 2)), self.pad_val, dtype=results['img'].dtype) # mosaic center x, y center_x = int( random.uniform(*self.center_ratio_range) * self.img_scale[1]) center_y = int( random.uniform(*self.center_ratio_range) * self.img_scale[0]) center_position = (center_x, center_y) loc_strs = ('top_left', 'top_right', 'bottom_left', 'bottom_right') for i, loc in enumerate(loc_strs): if loc == 'top_left': results_patch = copy.deepcopy(results) else: results_patch = copy.deepcopy(results['mix_results'][i - 1]) img_i = results_patch['img'] h_i, w_i = img_i.shape[:2] # keep_ratio resize scale_ratio_i = min(self.img_scale[0] / h_i, self.img_scale[1] / w_i) img_i = mmcv.imresize( img_i, (int(w_i * scale_ratio_i), int(h_i * scale_ratio_i))) # compute the combine parameters paste_coord, crop_coord = self._mosaic_combine( loc, center_position, img_i.shape[:2][::-1]) x1_p, y1_p, x2_p, y2_p = paste_coord x1_c, y1_c, x2_c, y2_c = crop_coord # crop and paste image mosaic_img[y1_p:y2_p, x1_p:x2_p] = img_i[y1_c:y2_c, x1_c:x2_c] # adjust coordinate gt_bboxes_i = results_patch['gt_bboxes'] gt_labels_i = results_patch['gt_labels'] if gt_bboxes_i.shape[0] > 0: padw = x1_p - x1_c padh = y1_p - y1_c gt_bboxes_i[:, 0] = \ scale_ratio_i * gt_bboxes_i[:, 0] + padw gt_bboxes_i[:, 1] = \ scale_ratio_i * gt_bboxes_i[:, 1] + padh gt_bboxes_i[:, 2:4] = \ scale_ratio_i * gt_bboxes_i[:, 2:4] mosaic_bboxes.append(gt_bboxes_i) mosaic_labels.append(gt_labels_i) if len(mosaic_labels) > 0: mosaic_bboxes = np.concatenate(mosaic_bboxes, 0) mosaic_labels = np.concatenate(mosaic_labels, 0) mosaic_bboxes, mosaic_labels = \ self._filter_box_candidates( mosaic_bboxes, mosaic_labels, 2 * self.img_scale[1], 2 * self.img_scale[0] ) # If results after rmosaic does not contain any valid gt-bbox, # return None. And transform flows in MultiImageMixDataset will # repeat until existing valid gt-bbox. if len(mosaic_bboxes) == 0: return None results['img'] = mosaic_img results['img_shape'] = mosaic_img.shape results['gt_bboxes'] = mosaic_bboxes results['gt_labels'] = mosaic_labels return results def _filter_box_candidates(self, bboxes, labels, w, h): """Filter out small bboxes and outside bboxes after Mosaic.""" bbox_x, bbox_y, bbox_w, bbox_h = \ bboxes[:, 0], bboxes[:, 1], bboxes[:, 2], bboxes[:, 3] valid_inds = (bbox_x > 0) & (bbox_x < w) & \ (bbox_y > 0) & (bbox_y < h) & \ (bbox_w > self.min_bbox_size) & \ (bbox_h > self.min_bbox_size) valid_inds = np.nonzero(valid_inds)[0] return bboxes[valid_inds], labels[valid_inds]
Read the Docs v: v0.3.3
On Read the Docs
Project Home

Free document hosting provided by Read the Docs.