Spaces:
Runtime error
Runtime error
# Copyright (c) OpenMMLab. All rights reserved. | |
import math | |
import cv2 | |
import mmcv | |
import numpy as np | |
import torchvision.transforms as transforms | |
from mmdet.core import BitmapMasks, PolygonMasks | |
from mmdet.datasets.builder import PIPELINES | |
from mmdet.datasets.pipelines.transforms import Resize | |
from PIL import Image | |
from shapely.geometry import Polygon as plg | |
import mmocr.core.evaluation.utils as eval_utils | |
from mmocr.utils import check_argument | |
class RandomCropInstances: | |
"""Randomly crop images and make sure to contain text instances. | |
Args: | |
target_size (tuple or int): (height, width) | |
positive_sample_ratio (float): The probability of sampling regions | |
that go through positive regions. | |
""" | |
def __init__( | |
self, | |
target_size, | |
instance_key, | |
mask_type='inx0', # 'inx0' or 'union_all' | |
positive_sample_ratio=5.0 / 8.0): | |
assert mask_type in ['inx0', 'union_all'] | |
self.mask_type = mask_type | |
self.instance_key = instance_key | |
self.positive_sample_ratio = positive_sample_ratio | |
self.target_size = target_size if (target_size is None or isinstance( | |
target_size, tuple)) else (target_size, target_size) | |
def sample_offset(self, img_gt, img_size): | |
h, w = img_size | |
t_h, t_w = self.target_size | |
# target size is bigger than origin size | |
t_h = t_h if t_h < h else h | |
t_w = t_w if t_w < w else w | |
if (img_gt is not None | |
and np.random.random_sample() < self.positive_sample_ratio | |
and np.max(img_gt) > 0): | |
# make sure to crop the positive region | |
# the minimum top left to crop positive region (h,w) | |
tl = np.min(np.where(img_gt > 0), axis=1) - (t_h, t_w) | |
tl[tl < 0] = 0 | |
# the maximum top left to crop positive region | |
br = np.max(np.where(img_gt > 0), axis=1) - (t_h, t_w) | |
br[br < 0] = 0 | |
# if br is too big so that crop the outside region of img | |
br[0] = min(br[0], h - t_h) | |
br[1] = min(br[1], w - t_w) | |
# | |
h = np.random.randint(tl[0], br[0]) if tl[0] < br[0] else 0 | |
w = np.random.randint(tl[1], br[1]) if tl[1] < br[1] else 0 | |
else: | |
# make sure not to crop outside of img | |
h = np.random.randint(0, h - t_h) if h - t_h > 0 else 0 | |
w = np.random.randint(0, w - t_w) if w - t_w > 0 else 0 | |
return (h, w) | |
def crop_img(img, offset, target_size): | |
h, w = img.shape[:2] | |
br = np.min( | |
np.stack((np.array(offset) + np.array(target_size), np.array( | |
(h, w)))), | |
axis=0) | |
return img[offset[0]:br[0], offset[1]:br[1]], np.array( | |
[offset[1], offset[0], br[1], br[0]]) | |
def crop_bboxes(self, bboxes, canvas_bbox): | |
kept_bboxes = [] | |
kept_inx = [] | |
canvas_poly = eval_utils.box2polygon(canvas_bbox) | |
tl = canvas_bbox[0:2] | |
for idx, bbox in enumerate(bboxes): | |
poly = eval_utils.box2polygon(bbox) | |
area, inters = eval_utils.poly_intersection( | |
poly, canvas_poly, return_poly=True) | |
if area == 0: | |
continue | |
xmin, ymin, xmax, ymax = inters.bounds | |
kept_bboxes += [ | |
np.array( | |
[xmin - tl[0], ymin - tl[1], xmax - tl[0], ymax - tl[1]], | |
dtype=np.float32) | |
] | |
kept_inx += [idx] | |
if len(kept_inx) == 0: | |
return np.array([]).astype(np.float32).reshape(0, 4), kept_inx | |
return np.stack(kept_bboxes), kept_inx | |
def generate_mask(gt_mask, type): | |
if type == 'inx0': | |
return gt_mask.masks[0] | |
if type == 'union_all': | |
mask = gt_mask.masks[0].copy() | |
for idx in range(1, len(gt_mask.masks)): | |
mask = np.logical_or(mask, gt_mask.masks[idx]) | |
return mask | |
raise NotImplementedError | |
def __call__(self, results): | |
gt_mask = results[self.instance_key] | |
mask = None | |
if len(gt_mask.masks) > 0: | |
mask = self.generate_mask(gt_mask, self.mask_type) | |
results['crop_offset'] = self.sample_offset(mask, | |
results['img'].shape[:2]) | |
# crop img. bbox = [x1,y1,x2,y2] | |
img, bbox = self.crop_img(results['img'], results['crop_offset'], | |
self.target_size) | |
results['img'] = img | |
img_shape = img.shape | |
results['img_shape'] = img_shape | |
# crop masks | |
for key in results.get('mask_fields', []): | |
results[key] = results[key].crop(bbox) | |
# for mask rcnn | |
for key in results.get('bbox_fields', []): | |
results[key], kept_inx = self.crop_bboxes(results[key], bbox) | |
if key == 'gt_bboxes': | |
# ignore gt_labels accordingly | |
if 'gt_labels' in results: | |
ori_labels = results['gt_labels'] | |
ori_inst_num = len(ori_labels) | |
results['gt_labels'] = [ | |
ori_labels[idx] for idx in range(ori_inst_num) | |
if idx in kept_inx | |
] | |
# ignore g_masks accordingly | |
if 'gt_masks' in results: | |
ori_mask = results['gt_masks'].masks | |
kept_mask = [ | |
ori_mask[idx] for idx in range(ori_inst_num) | |
if idx in kept_inx | |
] | |
target_h, target_w = bbox[3] - bbox[1], bbox[2] - bbox[0] | |
if len(kept_inx) > 0: | |
kept_mask = np.stack(kept_mask) | |
else: | |
kept_mask = np.empty((0, target_h, target_w), | |
dtype=np.float32) | |
results['gt_masks'] = BitmapMasks(kept_mask, target_h, | |
target_w) | |
return results | |
def __repr__(self): | |
repr_str = self.__class__.__name__ | |
return repr_str | |
class RandomRotateTextDet: | |
"""Randomly rotate images.""" | |
def __init__(self, rotate_ratio=1.0, max_angle=10): | |
self.rotate_ratio = rotate_ratio | |
self.max_angle = max_angle | |
def sample_angle(max_angle): | |
angle = np.random.random_sample() * 2 * max_angle - max_angle | |
return angle | |
def rotate_img(img, angle): | |
h, w = img.shape[:2] | |
rotation_matrix = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1) | |
img_target = cv2.warpAffine( | |
img, rotation_matrix, (w, h), flags=cv2.INTER_NEAREST) | |
assert img_target.shape == img.shape | |
return img_target | |
def __call__(self, results): | |
if np.random.random_sample() < self.rotate_ratio: | |
# rotate imgs | |
results['rotated_angle'] = self.sample_angle(self.max_angle) | |
img = self.rotate_img(results['img'], results['rotated_angle']) | |
results['img'] = img | |
img_shape = img.shape | |
results['img_shape'] = img_shape | |
# rotate masks | |
for key in results.get('mask_fields', []): | |
masks = results[key].masks | |
mask_list = [] | |
for m in masks: | |
rotated_m = self.rotate_img(m, results['rotated_angle']) | |
mask_list.append(rotated_m) | |
results[key] = BitmapMasks(mask_list, *(img_shape[:2])) | |
return results | |
def __repr__(self): | |
repr_str = self.__class__.__name__ | |
return repr_str | |
class ColorJitter: | |
"""An interface for torch color jitter so that it can be invoked in | |
mmdetection pipeline.""" | |
def __init__(self, **kwargs): | |
self.transform = transforms.ColorJitter(**kwargs) | |
def __call__(self, results): | |
# img is bgr | |
img = results['img'][..., ::-1] | |
img = Image.fromarray(img) | |
img = self.transform(img) | |
img = np.asarray(img) | |
img = img[..., ::-1] | |
results['img'] = img | |
return results | |
def __repr__(self): | |
repr_str = self.__class__.__name__ | |
return repr_str | |
class ScaleAspectJitter(Resize): | |
"""Resize image and segmentation mask encoded by coordinates. | |
Allowed resize types are `around_min_img_scale`, `long_short_bound`, and | |
`indep_sample_in_range`. | |
""" | |
def __init__(self, | |
img_scale=None, | |
multiscale_mode='range', | |
ratio_range=None, | |
keep_ratio=False, | |
resize_type='around_min_img_scale', | |
aspect_ratio_range=None, | |
long_size_bound=None, | |
short_size_bound=None, | |
scale_range=None): | |
super().__init__( | |
img_scale=img_scale, | |
multiscale_mode=multiscale_mode, | |
ratio_range=ratio_range, | |
keep_ratio=keep_ratio) | |
assert not keep_ratio | |
assert resize_type in [ | |
'around_min_img_scale', 'long_short_bound', 'indep_sample_in_range' | |
] | |
self.resize_type = resize_type | |
if resize_type == 'indep_sample_in_range': | |
assert ratio_range is None | |
assert aspect_ratio_range is None | |
assert short_size_bound is None | |
assert long_size_bound is None | |
assert scale_range is not None | |
else: | |
assert scale_range is None | |
assert isinstance(ratio_range, tuple) | |
assert isinstance(aspect_ratio_range, tuple) | |
assert check_argument.equal_len(ratio_range, aspect_ratio_range) | |
if resize_type in ['long_short_bound']: | |
assert short_size_bound is not None | |
assert long_size_bound is not None | |
self.aspect_ratio_range = aspect_ratio_range | |
self.long_size_bound = long_size_bound | |
self.short_size_bound = short_size_bound | |
self.scale_range = scale_range | |
def sample_from_range(range): | |
assert len(range) == 2 | |
min_value, max_value = min(range), max(range) | |
value = np.random.random_sample() * (max_value - min_value) + min_value | |
return value | |
def _random_scale(self, results): | |
if self.resize_type == 'indep_sample_in_range': | |
w = self.sample_from_range(self.scale_range) | |
h = self.sample_from_range(self.scale_range) | |
results['scale'] = (int(w), int(h)) # (w,h) | |
results['scale_idx'] = None | |
return | |
h, w = results['img'].shape[0:2] | |
if self.resize_type == 'long_short_bound': | |
scale1 = 1 | |
if max(h, w) > self.long_size_bound: | |
scale1 = self.long_size_bound / max(h, w) | |
scale2 = self.sample_from_range(self.ratio_range) | |
scale = scale1 * scale2 | |
if min(h, w) * scale <= self.short_size_bound: | |
scale = (self.short_size_bound + 10) * 1.0 / min(h, w) | |
elif self.resize_type == 'around_min_img_scale': | |
short_size = min(self.img_scale[0]) | |
ratio = self.sample_from_range(self.ratio_range) | |
scale = (ratio * short_size) / min(h, w) | |
else: | |
raise NotImplementedError | |
aspect = self.sample_from_range(self.aspect_ratio_range) | |
h_scale = scale * math.sqrt(aspect) | |
w_scale = scale / math.sqrt(aspect) | |
results['scale'] = (int(w * w_scale), int(h * h_scale)) # (w,h) | |
results['scale_idx'] = None | |
class AffineJitter: | |
"""An interface for torchvision random affine so that it can be invoked in | |
mmdet pipeline.""" | |
def __init__(self, | |
degrees=4, | |
translate=(0.02, 0.04), | |
scale=(0.9, 1.1), | |
shear=None, | |
resample=False, | |
fillcolor=0): | |
self.transform = transforms.RandomAffine( | |
degrees=degrees, | |
translate=translate, | |
scale=scale, | |
shear=shear, | |
resample=resample, | |
fillcolor=fillcolor) | |
def __call__(self, results): | |
# img is bgr | |
img = results['img'][..., ::-1] | |
img = Image.fromarray(img) | |
img = self.transform(img) | |
img = np.asarray(img) | |
img = img[..., ::-1] | |
results['img'] = img | |
return results | |
def __repr__(self): | |
repr_str = self.__class__.__name__ | |
return repr_str | |
class RandomCropPolyInstances: | |
"""Randomly crop images and make sure to contain at least one intact | |
instance.""" | |
def __init__(self, | |
instance_key='gt_masks', | |
crop_ratio=5.0 / 8.0, | |
min_side_ratio=0.4): | |
super().__init__() | |
self.instance_key = instance_key | |
self.crop_ratio = crop_ratio | |
self.min_side_ratio = min_side_ratio | |
def sample_valid_start_end(self, valid_array, min_len, max_start, min_end): | |
assert isinstance(min_len, int) | |
assert len(valid_array) > min_len | |
start_array = valid_array.copy() | |
max_start = min(len(start_array) - min_len, max_start) | |
start_array[max_start:] = 0 | |
start_array[0] = 1 | |
diff_array = np.hstack([0, start_array]) - np.hstack([start_array, 0]) | |
region_starts = np.where(diff_array < 0)[0] | |
region_ends = np.where(diff_array > 0)[0] | |
region_ind = np.random.randint(0, len(region_starts)) | |
start = np.random.randint(region_starts[region_ind], | |
region_ends[region_ind]) | |
end_array = valid_array.copy() | |
min_end = max(start + min_len, min_end) | |
end_array[:min_end] = 0 | |
end_array[-1] = 1 | |
diff_array = np.hstack([0, end_array]) - np.hstack([end_array, 0]) | |
region_starts = np.where(diff_array < 0)[0] | |
region_ends = np.where(diff_array > 0)[0] | |
region_ind = np.random.randint(0, len(region_starts)) | |
end = np.random.randint(region_starts[region_ind], | |
region_ends[region_ind]) | |
return start, end | |
def sample_crop_box(self, img_size, results): | |
"""Generate crop box and make sure not to crop the polygon instances. | |
Args: | |
img_size (tuple(int)): The image size (h, w). | |
results (dict): The results dict. | |
""" | |
assert isinstance(img_size, tuple) | |
h, w = img_size[:2] | |
key_masks = results[self.instance_key].masks | |
x_valid_array = np.ones(w, dtype=np.int32) | |
y_valid_array = np.ones(h, dtype=np.int32) | |
selected_mask = key_masks[np.random.randint(0, len(key_masks))] | |
selected_mask = selected_mask[0].reshape((-1, 2)).astype(np.int32) | |
max_x_start = max(np.min(selected_mask[:, 0]) - 2, 0) | |
min_x_end = min(np.max(selected_mask[:, 0]) + 3, w - 1) | |
max_y_start = max(np.min(selected_mask[:, 1]) - 2, 0) | |
min_y_end = min(np.max(selected_mask[:, 1]) + 3, h - 1) | |
for key in results.get('mask_fields', []): | |
if len(results[key].masks) == 0: | |
continue | |
masks = results[key].masks | |
for mask in masks: | |
assert len(mask) == 1 | |
mask = mask[0].reshape((-1, 2)).astype(np.int32) | |
clip_x = np.clip(mask[:, 0], 0, w - 1) | |
clip_y = np.clip(mask[:, 1], 0, h - 1) | |
min_x, max_x = np.min(clip_x), np.max(clip_x) | |
min_y, max_y = np.min(clip_y), np.max(clip_y) | |
x_valid_array[min_x - 2:max_x + 3] = 0 | |
y_valid_array[min_y - 2:max_y + 3] = 0 | |
min_w = int(w * self.min_side_ratio) | |
min_h = int(h * self.min_side_ratio) | |
x1, x2 = self.sample_valid_start_end(x_valid_array, min_w, max_x_start, | |
min_x_end) | |
y1, y2 = self.sample_valid_start_end(y_valid_array, min_h, max_y_start, | |
min_y_end) | |
return np.array([x1, y1, x2, y2]) | |
def crop_img(self, img, bbox): | |
assert img.ndim == 3 | |
h, w, _ = img.shape | |
assert 0 <= bbox[1] < bbox[3] <= h | |
assert 0 <= bbox[0] < bbox[2] <= w | |
return img[bbox[1]:bbox[3], bbox[0]:bbox[2]] | |
def __call__(self, results): | |
if len(results[self.instance_key].masks) < 1: | |
return results | |
if np.random.random_sample() < self.crop_ratio: | |
crop_box = self.sample_crop_box(results['img'].shape, results) | |
results['crop_region'] = crop_box | |
img = self.crop_img(results['img'], crop_box) | |
results['img'] = img | |
results['img_shape'] = img.shape | |
# crop and filter masks | |
x1, y1, x2, y2 = crop_box | |
w = max(x2 - x1, 1) | |
h = max(y2 - y1, 1) | |
labels = results['gt_labels'] | |
valid_labels = [] | |
for key in results.get('mask_fields', []): | |
if len(results[key].masks) == 0: | |
continue | |
results[key] = results[key].crop(crop_box) | |
# filter out polygons beyond crop box. | |
masks = results[key].masks | |
valid_masks_list = [] | |
for ind, mask in enumerate(masks): | |
assert len(mask) == 1 | |
polygon = mask[0].reshape((-1, 2)) | |
if (polygon[:, 0] > | |
-4).all() and (polygon[:, 0] < w + 4).all() and ( | |
polygon[:, 1] > -4).all() and (polygon[:, 1] < | |
h + 4).all(): | |
mask[0][::2] = np.clip(mask[0][::2], 0, w) | |
mask[0][1::2] = np.clip(mask[0][1::2], 0, h) | |
if key == self.instance_key: | |
valid_labels.append(labels[ind]) | |
valid_masks_list.append(mask) | |
results[key] = PolygonMasks(valid_masks_list, h, w) | |
results['gt_labels'] = np.array(valid_labels) | |
return results | |
def __repr__(self): | |
repr_str = self.__class__.__name__ | |
return repr_str | |
class RandomRotatePolyInstances: | |
def __init__(self, | |
rotate_ratio=0.5, | |
max_angle=10, | |
pad_with_fixed_color=False, | |
pad_value=(0, 0, 0)): | |
"""Randomly rotate images and polygon masks. | |
Args: | |
rotate_ratio (float): The ratio of samples to operate rotation. | |
max_angle (int): The maximum rotation angle. | |
pad_with_fixed_color (bool): The flag for whether to pad rotated | |
image with fixed value. If set to False, the rotated image will | |
be padded onto cropped image. | |
pad_value (tuple(int)): The color value for padding rotated image. | |
""" | |
self.rotate_ratio = rotate_ratio | |
self.max_angle = max_angle | |
self.pad_with_fixed_color = pad_with_fixed_color | |
self.pad_value = pad_value | |
def rotate(self, center, points, theta, center_shift=(0, 0)): | |
# rotate points. | |
(center_x, center_y) = center | |
center_y = -center_y | |
x, y = points[::2], points[1::2] | |
y = -y | |
theta = theta / 180 * math.pi | |
cos = math.cos(theta) | |
sin = math.sin(theta) | |
x = (x - center_x) | |
y = (y - center_y) | |
_x = center_x + x * cos - y * sin + center_shift[0] | |
_y = -(center_y + x * sin + y * cos) + center_shift[1] | |
points[::2], points[1::2] = _x, _y | |
return points | |
def cal_canvas_size(self, ori_size, degree): | |
assert isinstance(ori_size, tuple) | |
angle = degree * math.pi / 180.0 | |
h, w = ori_size[:2] | |
cos = math.cos(angle) | |
sin = math.sin(angle) | |
canvas_h = int(w * math.fabs(sin) + h * math.fabs(cos)) | |
canvas_w = int(w * math.fabs(cos) + h * math.fabs(sin)) | |
canvas_size = (canvas_h, canvas_w) | |
return canvas_size | |
def sample_angle(self, max_angle): | |
angle = np.random.random_sample() * 2 * max_angle - max_angle | |
return angle | |
def rotate_img(self, img, angle, canvas_size): | |
h, w = img.shape[:2] | |
rotation_matrix = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1) | |
rotation_matrix[0, 2] += int((canvas_size[1] - w) / 2) | |
rotation_matrix[1, 2] += int((canvas_size[0] - h) / 2) | |
if self.pad_with_fixed_color: | |
target_img = cv2.warpAffine( | |
img, | |
rotation_matrix, (canvas_size[1], canvas_size[0]), | |
flags=cv2.INTER_NEAREST, | |
borderValue=self.pad_value) | |
else: | |
mask = np.zeros_like(img) | |
(h_ind, w_ind) = (np.random.randint(0, h * 7 // 8), | |
np.random.randint(0, w * 7 // 8)) | |
img_cut = img[h_ind:(h_ind + h // 9), w_ind:(w_ind + w // 9)] | |
img_cut = mmcv.imresize(img_cut, (canvas_size[1], canvas_size[0])) | |
mask = cv2.warpAffine( | |
mask, | |
rotation_matrix, (canvas_size[1], canvas_size[0]), | |
borderValue=[1, 1, 1]) | |
target_img = cv2.warpAffine( | |
img, | |
rotation_matrix, (canvas_size[1], canvas_size[0]), | |
borderValue=[0, 0, 0]) | |
target_img = target_img + img_cut * mask | |
return target_img | |
def __call__(self, results): | |
if np.random.random_sample() < self.rotate_ratio: | |
img = results['img'] | |
h, w = img.shape[:2] | |
angle = self.sample_angle(self.max_angle) | |
canvas_size = self.cal_canvas_size((h, w), angle) | |
center_shift = (int( | |
(canvas_size[1] - w) / 2), int((canvas_size[0] - h) / 2)) | |
# rotate image | |
results['rotated_poly_angle'] = angle | |
img = self.rotate_img(img, angle, canvas_size) | |
results['img'] = img | |
img_shape = img.shape | |
results['img_shape'] = img_shape | |
# rotate polygons | |
for key in results.get('mask_fields', []): | |
if len(results[key].masks) == 0: | |
continue | |
masks = results[key].masks | |
rotated_masks = [] | |
for mask in masks: | |
rotated_mask = self.rotate((w / 2, h / 2), mask[0], angle, | |
center_shift) | |
rotated_masks.append([rotated_mask]) | |
results[key] = PolygonMasks(rotated_masks, *(img_shape[:2])) | |
return results | |
def __repr__(self): | |
repr_str = self.__class__.__name__ | |
return repr_str | |
class SquareResizePad: | |
def __init__(self, | |
target_size, | |
pad_ratio=0.6, | |
pad_with_fixed_color=False, | |
pad_value=(0, 0, 0)): | |
"""Resize or pad images to be square shape. | |
Args: | |
target_size (int): The target size of square shaped image. | |
pad_with_fixed_color (bool): The flag for whether to pad rotated | |
image with fixed value. If set to False, the rescales image will | |
be padded onto cropped image. | |
pad_value (tuple(int)): The color value for padding rotated image. | |
""" | |
assert isinstance(target_size, int) | |
assert isinstance(pad_ratio, float) | |
assert isinstance(pad_with_fixed_color, bool) | |
assert isinstance(pad_value, tuple) | |
self.target_size = target_size | |
self.pad_ratio = pad_ratio | |
self.pad_with_fixed_color = pad_with_fixed_color | |
self.pad_value = pad_value | |
def resize_img(self, img, keep_ratio=True): | |
h, w, _ = img.shape | |
if keep_ratio: | |
t_h = self.target_size if h >= w else int(h * self.target_size / w) | |
t_w = self.target_size if h <= w else int(w * self.target_size / h) | |
else: | |
t_h = t_w = self.target_size | |
img = mmcv.imresize(img, (t_w, t_h)) | |
return img, (t_h, t_w) | |
def square_pad(self, img): | |
h, w = img.shape[:2] | |
if h == w: | |
return img, (0, 0) | |
pad_size = max(h, w) | |
if self.pad_with_fixed_color: | |
expand_img = np.ones((pad_size, pad_size, 3), dtype=np.uint8) | |
expand_img[:] = self.pad_value | |
else: | |
(h_ind, w_ind) = (np.random.randint(0, h * 7 // 8), | |
np.random.randint(0, w * 7 // 8)) | |
img_cut = img[h_ind:(h_ind + h // 9), w_ind:(w_ind + w // 9)] | |
expand_img = mmcv.imresize(img_cut, (pad_size, pad_size)) | |
if h > w: | |
y0, x0 = 0, (h - w) // 2 | |
else: | |
y0, x0 = (w - h) // 2, 0 | |
expand_img[y0:y0 + h, x0:x0 + w] = img | |
offset = (x0, y0) | |
return expand_img, offset | |
def square_pad_mask(self, points, offset): | |
x0, y0 = offset | |
pad_points = points.copy() | |
pad_points[::2] = pad_points[::2] + x0 | |
pad_points[1::2] = pad_points[1::2] + y0 | |
return pad_points | |
def __call__(self, results): | |
img = results['img'] | |
if np.random.random_sample() < self.pad_ratio: | |
img, out_size = self.resize_img(img, keep_ratio=True) | |
img, offset = self.square_pad(img) | |
else: | |
img, out_size = self.resize_img(img, keep_ratio=False) | |
offset = (0, 0) | |
results['img'] = img | |
results['img_shape'] = img.shape | |
for key in results.get('mask_fields', []): | |
if len(results[key].masks) == 0: | |
continue | |
results[key] = results[key].resize(out_size) | |
masks = results[key].masks | |
processed_masks = [] | |
for mask in masks: | |
square_pad_mask = self.square_pad_mask(mask[0], offset) | |
processed_masks.append([square_pad_mask]) | |
results[key] = PolygonMasks(processed_masks, *(img.shape[:2])) | |
return results | |
def __repr__(self): | |
repr_str = self.__class__.__name__ | |
return repr_str | |
class RandomScaling: | |
def __init__(self, size=800, scale=(3. / 4, 5. / 2)): | |
"""Random scale the image while keeping aspect. | |
Args: | |
size (int) : Base size before scaling. | |
scale (tuple(float)) : The range of scaling. | |
""" | |
assert isinstance(size, int) | |
assert isinstance(scale, float) or isinstance(scale, tuple) | |
self.size = size | |
self.scale = scale if isinstance(scale, tuple) \ | |
else (1 - scale, 1 + scale) | |
def __call__(self, results): | |
image = results['img'] | |
h, w, _ = results['img_shape'] | |
aspect_ratio = np.random.uniform(min(self.scale), max(self.scale)) | |
scales = self.size * 1.0 / max(h, w) * aspect_ratio | |
scales = np.array([scales, scales]) | |
out_size = (int(h * scales[1]), int(w * scales[0])) | |
image = mmcv.imresize(image, out_size[::-1]) | |
results['img'] = image | |
results['img_shape'] = image.shape | |
for key in results.get('mask_fields', []): | |
if len(results[key].masks) == 0: | |
continue | |
results[key] = results[key].resize(out_size) | |
return results | |
class RandomCropFlip: | |
def __init__(self, | |
pad_ratio=0.1, | |
crop_ratio=0.5, | |
iter_num=1, | |
min_area_ratio=0.2): | |
"""Random crop and flip a patch of the image. | |
Args: | |
crop_ratio (float): The ratio of cropping. | |
iter_num (int): Number of operations. | |
min_area_ratio (float): Minimal area ratio between cropped patch | |
and original image. | |
""" | |
assert isinstance(crop_ratio, float) | |
assert isinstance(iter_num, int) | |
assert isinstance(min_area_ratio, float) | |
self.pad_ratio = pad_ratio | |
self.epsilon = 1e-2 | |
self.crop_ratio = crop_ratio | |
self.iter_num = iter_num | |
self.min_area_ratio = min_area_ratio | |
def __call__(self, results): | |
for i in range(self.iter_num): | |
results = self.random_crop_flip(results) | |
return results | |
def random_crop_flip(self, results): | |
image = results['img'] | |
polygons = results['gt_masks'].masks | |
ignore_polygons = results['gt_masks_ignore'].masks | |
all_polygons = polygons + ignore_polygons | |
if len(polygons) == 0: | |
return results | |
if np.random.random() >= self.crop_ratio: | |
return results | |
h, w, _ = results['img_shape'] | |
area = h * w | |
pad_h = int(h * self.pad_ratio) | |
pad_w = int(w * self.pad_ratio) | |
h_axis, w_axis = self.generate_crop_target(image, all_polygons, pad_h, | |
pad_w) | |
if len(h_axis) == 0 or len(w_axis) == 0: | |
return results | |
attempt = 0 | |
while attempt < 10: | |
attempt += 1 | |
polys_keep = [] | |
polys_new = [] | |
ign_polys_keep = [] | |
ign_polys_new = [] | |
xx = np.random.choice(w_axis, size=2) | |
xmin = np.min(xx) - pad_w | |
xmax = np.max(xx) - pad_w | |
xmin = np.clip(xmin, 0, w - 1) | |
xmax = np.clip(xmax, 0, w - 1) | |
yy = np.random.choice(h_axis, size=2) | |
ymin = np.min(yy) - pad_h | |
ymax = np.max(yy) - pad_h | |
ymin = np.clip(ymin, 0, h - 1) | |
ymax = np.clip(ymax, 0, h - 1) | |
if (xmax - xmin) * (ymax - ymin) < area * self.min_area_ratio: | |
# area too small | |
continue | |
pts = np.stack([[xmin, xmax, xmax, xmin], | |
[ymin, ymin, ymax, ymax]]).T.astype(np.int32) | |
pp = plg(pts) | |
fail_flag = False | |
for polygon in polygons: | |
ppi = plg(polygon[0].reshape(-1, 2)) | |
ppiou = eval_utils.poly_intersection(ppi, pp) | |
if np.abs(ppiou - float(ppi.area)) > self.epsilon and \ | |
np.abs(ppiou) > self.epsilon: | |
fail_flag = True | |
break | |
elif np.abs(ppiou - float(ppi.area)) < self.epsilon: | |
polys_new.append(polygon) | |
else: | |
polys_keep.append(polygon) | |
for polygon in ignore_polygons: | |
ppi = plg(polygon[0].reshape(-1, 2)) | |
ppiou = eval_utils.poly_intersection(ppi, pp) | |
if np.abs(ppiou - float(ppi.area)) > self.epsilon and \ | |
np.abs(ppiou) > self.epsilon: | |
fail_flag = True | |
break | |
elif np.abs(ppiou - float(ppi.area)) < self.epsilon: | |
ign_polys_new.append(polygon) | |
else: | |
ign_polys_keep.append(polygon) | |
if fail_flag: | |
continue | |
else: | |
break | |
cropped = image[ymin:ymax, xmin:xmax, :] | |
select_type = np.random.randint(3) | |
if select_type == 0: | |
img = np.ascontiguousarray(cropped[:, ::-1]) | |
elif select_type == 1: | |
img = np.ascontiguousarray(cropped[::-1, :]) | |
else: | |
img = np.ascontiguousarray(cropped[::-1, ::-1]) | |
image[ymin:ymax, xmin:xmax, :] = img | |
results['img'] = image | |
if len(polys_new) + len(ign_polys_new) != 0: | |
height, width, _ = cropped.shape | |
if select_type == 0: | |
for idx, polygon in enumerate(polys_new): | |
poly = polygon[0].reshape(-1, 2) | |
poly[:, 0] = width - poly[:, 0] + 2 * xmin | |
polys_new[idx] = [poly.reshape(-1, )] | |
for idx, polygon in enumerate(ign_polys_new): | |
poly = polygon[0].reshape(-1, 2) | |
poly[:, 0] = width - poly[:, 0] + 2 * xmin | |
ign_polys_new[idx] = [poly.reshape(-1, )] | |
elif select_type == 1: | |
for idx, polygon in enumerate(polys_new): | |
poly = polygon[0].reshape(-1, 2) | |
poly[:, 1] = height - poly[:, 1] + 2 * ymin | |
polys_new[idx] = [poly.reshape(-1, )] | |
for idx, polygon in enumerate(ign_polys_new): | |
poly = polygon[0].reshape(-1, 2) | |
poly[:, 1] = height - poly[:, 1] + 2 * ymin | |
ign_polys_new[idx] = [poly.reshape(-1, )] | |
else: | |
for idx, polygon in enumerate(polys_new): | |
poly = polygon[0].reshape(-1, 2) | |
poly[:, 0] = width - poly[:, 0] + 2 * xmin | |
poly[:, 1] = height - poly[:, 1] + 2 * ymin | |
polys_new[idx] = [poly.reshape(-1, )] | |
for idx, polygon in enumerate(ign_polys_new): | |
poly = polygon[0].reshape(-1, 2) | |
poly[:, 0] = width - poly[:, 0] + 2 * xmin | |
poly[:, 1] = height - poly[:, 1] + 2 * ymin | |
ign_polys_new[idx] = [poly.reshape(-1, )] | |
polygons = polys_keep + polys_new | |
ignore_polygons = ign_polys_keep + ign_polys_new | |
results['gt_masks'] = PolygonMasks(polygons, *(image.shape[:2])) | |
results['gt_masks_ignore'] = PolygonMasks(ignore_polygons, | |
*(image.shape[:2])) | |
return results | |
def generate_crop_target(self, image, all_polys, pad_h, pad_w): | |
"""Generate crop target and make sure not to crop the polygon | |
instances. | |
Args: | |
image (ndarray): The image waited to be crop. | |
all_polys (list[list[ndarray]]): All polygons including ground | |
truth polygons and ground truth ignored polygons. | |
pad_h (int): Padding length of height. | |
pad_w (int): Padding length of width. | |
Returns: | |
h_axis (ndarray): Vertical cropping range. | |
w_axis (ndarray): Horizontal cropping range. | |
""" | |
h, w, _ = image.shape | |
h_array = np.zeros((h + pad_h * 2), dtype=np.int32) | |
w_array = np.zeros((w + pad_w * 2), dtype=np.int32) | |
text_polys = [] | |
for polygon in all_polys: | |
rect = cv2.minAreaRect(polygon[0].astype(np.int32).reshape(-1, 2)) | |
box = cv2.boxPoints(rect) | |
box = np.int0(box) | |
text_polys.append([box[0], box[1], box[2], box[3]]) | |
polys = np.array(text_polys, dtype=np.int32) | |
for poly in polys: | |
poly = np.round(poly, decimals=0).astype(np.int32) | |
minx = np.min(poly[:, 0]) | |
maxx = np.max(poly[:, 0]) | |
w_array[minx + pad_w:maxx + pad_w] = 1 | |
miny = np.min(poly[:, 1]) | |
maxy = np.max(poly[:, 1]) | |
h_array[miny + pad_h:maxy + pad_h] = 1 | |
h_axis = np.where(h_array == 0)[0] | |
w_axis = np.where(w_array == 0)[0] | |
return h_axis, w_axis | |
class PyramidRescale: | |
"""Resize the image to the base shape, downsample it with gaussian pyramid, | |
and rescale it back to original size. | |
Adapted from https://github.com/FangShancheng/ABINet. | |
Args: | |
factor (int): The decay factor from base size, or the number of | |
downsampling operations from the base layer. | |
base_shape (tuple(int)): The shape of the base layer of the pyramid. | |
randomize_factor (bool): If True, the final factor would be a random | |
integer in [0, factor]. | |
:Required Keys: | |
- | ``img`` (ndarray): The input image. | |
:Affected Keys: | |
:Modified: | |
- | ``img`` (ndarray): The modified image. | |
""" | |
def __init__(self, factor=4, base_shape=(128, 512), randomize_factor=True): | |
assert isinstance(factor, int) | |
assert isinstance(base_shape, list) or isinstance(base_shape, tuple) | |
assert len(base_shape) == 2 | |
assert isinstance(randomize_factor, bool) | |
self.factor = factor if not randomize_factor else np.random.randint( | |
0, factor + 1) | |
self.base_w, self.base_h = base_shape | |
def __call__(self, results): | |
assert 'img' in results | |
if self.factor == 0: | |
return results | |
img = results['img'] | |
src_h, src_w = img.shape[:2] | |
scale_img = mmcv.imresize(img, (self.base_w, self.base_h)) | |
for _ in range(self.factor): | |
scale_img = cv2.pyrDown(scale_img) | |
scale_img = mmcv.imresize(scale_img, (src_w, src_h)) | |
results['img'] = scale_img | |
return results | |
def __repr__(self): | |
repr_str = self.__class__.__name__ | |
repr_str += f'(factor={self.factor}, ' | |
repr_str += f'basew={self.basew}, baseh={self.baseh})' | |
return repr_str | |