|
|
|
import numpy as np |
|
import torch |
|
|
|
|
|
def find_inside_bboxes(bboxes, img_h, img_w): |
|
"""Find bboxes as long as a part of bboxes is inside the image. |
|
|
|
Args: |
|
bboxes (Tensor): Shape (N, 4). |
|
img_h (int): Image height. |
|
img_w (int): Image width. |
|
|
|
Returns: |
|
Tensor: Index of the remaining bboxes. |
|
""" |
|
inside_inds = (bboxes[:, 0] < img_w) & (bboxes[:, 2] > 0) \ |
|
& (bboxes[:, 1] < img_h) & (bboxes[:, 3] > 0) |
|
return inside_inds |
|
|
|
|
|
def bbox_flip(bboxes, img_shape, direction='horizontal'): |
|
"""Flip bboxes horizontally or vertically. |
|
|
|
Args: |
|
bboxes (Tensor): Shape (..., 4*k) |
|
img_shape (tuple): Image shape. |
|
direction (str): Flip direction, options are "horizontal", "vertical", |
|
"diagonal". Default: "horizontal" |
|
|
|
Returns: |
|
Tensor: Flipped bboxes. |
|
""" |
|
assert bboxes.shape[-1] % 4 == 0 |
|
assert direction in ['horizontal', 'vertical', 'diagonal'] |
|
flipped = bboxes.clone() |
|
if direction == 'horizontal': |
|
flipped[..., 0::4] = img_shape[1] - bboxes[..., 2::4] |
|
flipped[..., 2::4] = img_shape[1] - bboxes[..., 0::4] |
|
elif direction == 'vertical': |
|
flipped[..., 1::4] = img_shape[0] - bboxes[..., 3::4] |
|
flipped[..., 3::4] = img_shape[0] - bboxes[..., 1::4] |
|
else: |
|
flipped[..., 0::4] = img_shape[1] - bboxes[..., 2::4] |
|
flipped[..., 1::4] = img_shape[0] - bboxes[..., 3::4] |
|
flipped[..., 2::4] = img_shape[1] - bboxes[..., 0::4] |
|
flipped[..., 3::4] = img_shape[0] - bboxes[..., 1::4] |
|
return flipped |
|
|
|
|
|
def bbox_mapping(bboxes, |
|
img_shape, |
|
scale_factor, |
|
flip, |
|
flip_direction='horizontal'): |
|
"""Map bboxes from the original image scale to testing scale.""" |
|
new_bboxes = bboxes * bboxes.new_tensor(scale_factor) |
|
if flip: |
|
new_bboxes = bbox_flip(new_bboxes, img_shape, flip_direction) |
|
return new_bboxes |
|
|
|
|
|
def bbox_mapping_back(bboxes, |
|
img_shape, |
|
scale_factor, |
|
flip, |
|
flip_direction='horizontal'): |
|
"""Map bboxes from testing scale to original image scale.""" |
|
new_bboxes = bbox_flip(bboxes, img_shape, |
|
flip_direction) if flip else bboxes |
|
new_bboxes = new_bboxes.view(-1, 4) / new_bboxes.new_tensor(scale_factor) |
|
return new_bboxes.view(bboxes.shape) |
|
|
|
|
|
def bbox2roi(bbox_list): |
|
"""Convert a list of bboxes to roi format. |
|
|
|
Args: |
|
bbox_list (list[Tensor]): a list of bboxes corresponding to a batch |
|
of images. |
|
|
|
Returns: |
|
Tensor: shape (n, 5), [batch_ind, x1, y1, x2, y2] |
|
""" |
|
rois_list = [] |
|
for img_id, bboxes in enumerate(bbox_list): |
|
if bboxes.size(0) > 0: |
|
img_inds = bboxes.new_full((bboxes.size(0), 1), img_id) |
|
rois = torch.cat([img_inds, bboxes[:, :4]], dim=-1) |
|
else: |
|
rois = bboxes.new_zeros((0, 5)) |
|
rois_list.append(rois) |
|
rois = torch.cat(rois_list, 0) |
|
return rois |
|
|
|
|
|
def roi2bbox(rois): |
|
"""Convert rois to bounding box format. |
|
|
|
Args: |
|
rois (torch.Tensor): RoIs with the shape (n, 5) where the first |
|
column indicates batch id of each RoI. |
|
|
|
Returns: |
|
list[torch.Tensor]: Converted boxes of corresponding rois. |
|
""" |
|
bbox_list = [] |
|
img_ids = torch.unique(rois[:, 0].cpu(), sorted=True) |
|
for img_id in img_ids: |
|
inds = (rois[:, 0] == img_id.item()) |
|
bbox = rois[inds, 1:] |
|
bbox_list.append(bbox) |
|
return bbox_list |
|
|
|
|
|
def bbox2result(bboxes, labels, num_classes): |
|
"""Convert detection results to a list of numpy arrays. |
|
|
|
Args: |
|
bboxes (torch.Tensor | np.ndarray): shape (n, 5) |
|
labels (torch.Tensor | np.ndarray): shape (n, ) |
|
num_classes (int): class number, including background class |
|
|
|
Returns: |
|
list(ndarray): bbox results of each class |
|
""" |
|
if bboxes.shape[0] == 0: |
|
return [np.zeros((0, 5), dtype=np.float32) for i in range(num_classes)] |
|
else: |
|
if isinstance(bboxes, torch.Tensor): |
|
bboxes = bboxes.detach().cpu().numpy() |
|
labels = labels.detach().cpu().numpy() |
|
return [bboxes[labels == i, :] for i in range(num_classes)] |
|
|
|
|
|
def distance2bbox(points, distance, max_shape=None): |
|
"""Decode distance prediction to bounding box. |
|
|
|
Args: |
|
points (Tensor): Shape (B, N, 2) or (N, 2). |
|
distance (Tensor): Distance from the given point to 4 |
|
boundaries (left, top, right, bottom). Shape (B, N, 4) or (N, 4) |
|
max_shape (Sequence[int] or torch.Tensor or Sequence[ |
|
Sequence[int]],optional): Maximum bounds for boxes, specifies |
|
(H, W, C) or (H, W). If priors shape is (B, N, 4), then |
|
the max_shape should be a Sequence[Sequence[int]] |
|
and the length of max_shape should also be B. |
|
|
|
Returns: |
|
Tensor: Boxes with shape (N, 4) or (B, N, 4) |
|
""" |
|
|
|
x1 = points[..., 0] - distance[..., 0] |
|
y1 = points[..., 1] - distance[..., 1] |
|
x2 = points[..., 0] + distance[..., 2] |
|
y2 = points[..., 1] + distance[..., 3] |
|
|
|
bboxes = torch.stack([x1, y1, x2, y2], -1) |
|
|
|
if max_shape is not None: |
|
if bboxes.dim() == 2 and not torch.onnx.is_in_onnx_export(): |
|
|
|
bboxes[:, 0::2].clamp_(min=0, max=max_shape[1]) |
|
bboxes[:, 1::2].clamp_(min=0, max=max_shape[0]) |
|
return bboxes |
|
|
|
|
|
if torch.onnx.is_in_onnx_export(): |
|
from mmdet.core.export import dynamic_clip_for_onnx |
|
x1, y1, x2, y2 = dynamic_clip_for_onnx(x1, y1, x2, y2, max_shape) |
|
bboxes = torch.stack([x1, y1, x2, y2], dim=-1) |
|
return bboxes |
|
if not isinstance(max_shape, torch.Tensor): |
|
max_shape = x1.new_tensor(max_shape) |
|
max_shape = max_shape[..., :2].type_as(x1) |
|
if max_shape.ndim == 2: |
|
assert bboxes.ndim == 3 |
|
assert max_shape.size(0) == bboxes.size(0) |
|
|
|
min_xy = x1.new_tensor(0) |
|
max_xy = torch.cat([max_shape, max_shape], |
|
dim=-1).flip(-1).unsqueeze(-2) |
|
bboxes = torch.where(bboxes < min_xy, min_xy, bboxes) |
|
bboxes = torch.where(bboxes > max_xy, max_xy, bboxes) |
|
|
|
return bboxes |
|
|
|
|
|
def bbox2distance(points, bbox, max_dis=None, eps=0.1): |
|
"""Decode bounding box based on distances. |
|
|
|
Args: |
|
points (Tensor): Shape (n, 2), [x, y]. |
|
bbox (Tensor): Shape (n, 4), "xyxy" format |
|
max_dis (float): Upper bound of the distance. |
|
eps (float): a small value to ensure target < max_dis, instead <= |
|
|
|
Returns: |
|
Tensor: Decoded distances. |
|
""" |
|
left = points[:, 0] - bbox[:, 0] |
|
top = points[:, 1] - bbox[:, 1] |
|
right = bbox[:, 2] - points[:, 0] |
|
bottom = bbox[:, 3] - points[:, 1] |
|
if max_dis is not None: |
|
left = left.clamp(min=0, max=max_dis - eps) |
|
top = top.clamp(min=0, max=max_dis - eps) |
|
right = right.clamp(min=0, max=max_dis - eps) |
|
bottom = bottom.clamp(min=0, max=max_dis - eps) |
|
return torch.stack([left, top, right, bottom], -1) |
|
|
|
|
|
def bbox_rescale(bboxes, scale_factor=1.0): |
|
"""Rescale bounding box w.r.t. scale_factor. |
|
|
|
Args: |
|
bboxes (Tensor): Shape (n, 4) for bboxes or (n, 5) for rois |
|
scale_factor (float): rescale factor |
|
|
|
Returns: |
|
Tensor: Rescaled bboxes. |
|
""" |
|
if bboxes.size(1) == 5: |
|
bboxes_ = bboxes[:, 1:] |
|
inds_ = bboxes[:, 0] |
|
else: |
|
bboxes_ = bboxes |
|
cx = (bboxes_[:, 0] + bboxes_[:, 2]) * 0.5 |
|
cy = (bboxes_[:, 1] + bboxes_[:, 3]) * 0.5 |
|
w = bboxes_[:, 2] - bboxes_[:, 0] |
|
h = bboxes_[:, 3] - bboxes_[:, 1] |
|
w = w * scale_factor |
|
h = h * scale_factor |
|
x1 = cx - 0.5 * w |
|
x2 = cx + 0.5 * w |
|
y1 = cy - 0.5 * h |
|
y2 = cy + 0.5 * h |
|
if bboxes.size(1) == 5: |
|
rescaled_bboxes = torch.stack([inds_, x1, y1, x2, y2], dim=-1) |
|
else: |
|
rescaled_bboxes = torch.stack([x1, y1, x2, y2], dim=-1) |
|
return rescaled_bboxes |
|
|
|
|
|
def bbox_cxcywh_to_xyxy(bbox): |
|
"""Convert bbox coordinates from (cx, cy, w, h) to (x1, y1, x2, y2). |
|
|
|
Args: |
|
bbox (Tensor): Shape (n, 4) for bboxes. |
|
|
|
Returns: |
|
Tensor: Converted bboxes. |
|
""" |
|
cx, cy, w, h = bbox.split((1, 1, 1, 1), dim=-1) |
|
bbox_new = [(cx - 0.5 * w), (cy - 0.5 * h), (cx + 0.5 * w), (cy + 0.5 * h)] |
|
return torch.cat(bbox_new, dim=-1) |
|
|
|
|
|
def bbox_xyxy_to_cxcywh(bbox): |
|
"""Convert bbox coordinates from (x1, y1, x2, y2) to (cx, cy, w, h). |
|
|
|
Args: |
|
bbox (Tensor): Shape (n, 4) for bboxes. |
|
|
|
Returns: |
|
Tensor: Converted bboxes. |
|
""" |
|
x1, y1, x2, y2 = bbox.split((1, 1, 1, 1), dim=-1) |
|
bbox_new = [(x1 + x2) / 2, (y1 + y2) / 2, (x2 - x1), (y2 - y1)] |
|
return torch.cat(bbox_new, dim=-1) |
|
|