glenn-jocher
commited on
Commit
•
9e8fb9f
1
Parent(s):
81b3182
Create `utils/augmentations.py` (#3877)
Browse files* Create `utils/augmentations.py`
* cleanup
- utils/augmentations.py +244 -0
- utils/datasets.py +6 -235
utils/augmentations.py
ADDED
@@ -0,0 +1,244 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# YOLOv5 image augmentation functions
|
2 |
+
|
3 |
+
import random
|
4 |
+
|
5 |
+
import cv2
|
6 |
+
import math
|
7 |
+
import numpy as np
|
8 |
+
|
9 |
+
from utils.general import segment2box, resample_segments
|
10 |
+
from utils.metrics import bbox_ioa
|
11 |
+
|
12 |
+
|
13 |
+
def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
|
14 |
+
# HSV color-space augmentation
|
15 |
+
if hgain or sgain or vgain:
|
16 |
+
r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
|
17 |
+
hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV))
|
18 |
+
dtype = im.dtype # uint8
|
19 |
+
|
20 |
+
x = np.arange(0, 256, dtype=r.dtype)
|
21 |
+
lut_hue = ((x * r[0]) % 180).astype(dtype)
|
22 |
+
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
|
23 |
+
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
|
24 |
+
|
25 |
+
img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
|
26 |
+
cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=im) # no return needed
|
27 |
+
|
28 |
+
|
29 |
+
def hist_equalize(im, clahe=True, bgr=False):
|
30 |
+
# Equalize histogram on BGR image 'img' with img.shape(n,m,3) and range 0-255
|
31 |
+
yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV)
|
32 |
+
if clahe:
|
33 |
+
c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
34 |
+
yuv[:, :, 0] = c.apply(yuv[:, :, 0])
|
35 |
+
else:
|
36 |
+
yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0]) # equalize Y channel histogram
|
37 |
+
return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB) # convert YUV image to RGB
|
38 |
+
|
39 |
+
|
40 |
+
def replicate(im, labels):
|
41 |
+
# Replicate labels
|
42 |
+
h, w = im.shape[:2]
|
43 |
+
boxes = labels[:, 1:].astype(int)
|
44 |
+
x1, y1, x2, y2 = boxes.T
|
45 |
+
s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels)
|
46 |
+
for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices
|
47 |
+
x1b, y1b, x2b, y2b = boxes[i]
|
48 |
+
bh, bw = y2b - y1b, x2b - x1b
|
49 |
+
yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y
|
50 |
+
x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
|
51 |
+
im[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
|
52 |
+
labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
|
53 |
+
|
54 |
+
return im, labels
|
55 |
+
|
56 |
+
|
57 |
+
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
|
58 |
+
# Resize and pad image while meeting stride-multiple constraints
|
59 |
+
shape = im.shape[:2] # current shape [height, width]
|
60 |
+
if isinstance(new_shape, int):
|
61 |
+
new_shape = (new_shape, new_shape)
|
62 |
+
|
63 |
+
# Scale ratio (new / old)
|
64 |
+
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
|
65 |
+
if not scaleup: # only scale down, do not scale up (for better test mAP)
|
66 |
+
r = min(r, 1.0)
|
67 |
+
|
68 |
+
# Compute padding
|
69 |
+
ratio = r, r # width, height ratios
|
70 |
+
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
|
71 |
+
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
|
72 |
+
if auto: # minimum rectangle
|
73 |
+
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
|
74 |
+
elif scaleFill: # stretch
|
75 |
+
dw, dh = 0.0, 0.0
|
76 |
+
new_unpad = (new_shape[1], new_shape[0])
|
77 |
+
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
|
78 |
+
|
79 |
+
dw /= 2 # divide padding into 2 sides
|
80 |
+
dh /= 2
|
81 |
+
|
82 |
+
if shape[::-1] != new_unpad: # resize
|
83 |
+
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
|
84 |
+
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
|
85 |
+
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
|
86 |
+
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
|
87 |
+
return im, ratio, (dw, dh)
|
88 |
+
|
89 |
+
|
90 |
+
def random_perspective(im, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0,
|
91 |
+
border=(0, 0)):
|
92 |
+
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
|
93 |
+
# targets = [cls, xyxy]
|
94 |
+
|
95 |
+
height = im.shape[0] + border[0] * 2 # shape(h,w,c)
|
96 |
+
width = im.shape[1] + border[1] * 2
|
97 |
+
|
98 |
+
# Center
|
99 |
+
C = np.eye(3)
|
100 |
+
C[0, 2] = -im.shape[1] / 2 # x translation (pixels)
|
101 |
+
C[1, 2] = -im.shape[0] / 2 # y translation (pixels)
|
102 |
+
|
103 |
+
# Perspective
|
104 |
+
P = np.eye(3)
|
105 |
+
P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
|
106 |
+
P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
|
107 |
+
|
108 |
+
# Rotation and Scale
|
109 |
+
R = np.eye(3)
|
110 |
+
a = random.uniform(-degrees, degrees)
|
111 |
+
# a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
|
112 |
+
s = random.uniform(1 - scale, 1 + scale)
|
113 |
+
# s = 2 ** random.uniform(-scale, scale)
|
114 |
+
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
|
115 |
+
|
116 |
+
# Shear
|
117 |
+
S = np.eye(3)
|
118 |
+
S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
|
119 |
+
S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
|
120 |
+
|
121 |
+
# Translation
|
122 |
+
T = np.eye(3)
|
123 |
+
T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels)
|
124 |
+
T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels)
|
125 |
+
|
126 |
+
# Combined rotation matrix
|
127 |
+
M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
|
128 |
+
if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
|
129 |
+
if perspective:
|
130 |
+
im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114))
|
131 |
+
else: # affine
|
132 |
+
im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
|
133 |
+
|
134 |
+
# Visualize
|
135 |
+
# import matplotlib.pyplot as plt
|
136 |
+
# ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
|
137 |
+
# ax[0].imshow(img[:, :, ::-1]) # base
|
138 |
+
# ax[1].imshow(img2[:, :, ::-1]) # warped
|
139 |
+
|
140 |
+
# Transform label coordinates
|
141 |
+
n = len(targets)
|
142 |
+
if n:
|
143 |
+
use_segments = any(x.any() for x in segments)
|
144 |
+
new = np.zeros((n, 4))
|
145 |
+
if use_segments: # warp segments
|
146 |
+
segments = resample_segments(segments) # upsample
|
147 |
+
for i, segment in enumerate(segments):
|
148 |
+
xy = np.ones((len(segment), 3))
|
149 |
+
xy[:, :2] = segment
|
150 |
+
xy = xy @ M.T # transform
|
151 |
+
xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2] # perspective rescale or affine
|
152 |
+
|
153 |
+
# clip
|
154 |
+
new[i] = segment2box(xy, width, height)
|
155 |
+
|
156 |
+
else: # warp boxes
|
157 |
+
xy = np.ones((n * 4, 3))
|
158 |
+
xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
|
159 |
+
xy = xy @ M.T # transform
|
160 |
+
xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine
|
161 |
+
|
162 |
+
# create new boxes
|
163 |
+
x = xy[:, [0, 2, 4, 6]]
|
164 |
+
y = xy[:, [1, 3, 5, 7]]
|
165 |
+
new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
|
166 |
+
|
167 |
+
# clip
|
168 |
+
new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
|
169 |
+
new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
|
170 |
+
|
171 |
+
# filter candidates
|
172 |
+
i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)
|
173 |
+
targets = targets[i]
|
174 |
+
targets[:, 1:5] = new[i]
|
175 |
+
|
176 |
+
return im, targets
|
177 |
+
|
178 |
+
|
179 |
+
def copy_paste(im, labels, segments, probability=0.5):
|
180 |
+
# Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
|
181 |
+
n = len(segments)
|
182 |
+
if probability and n:
|
183 |
+
h, w, c = im.shape # height, width, channels
|
184 |
+
im_new = np.zeros(im.shape, np.uint8)
|
185 |
+
for j in random.sample(range(n), k=round(probability * n)):
|
186 |
+
l, s = labels[j], segments[j]
|
187 |
+
box = w - l[3], l[2], w - l[1], l[4]
|
188 |
+
ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
|
189 |
+
if (ioa < 0.30).all(): # allow 30% obscuration of existing labels
|
190 |
+
labels = np.concatenate((labels, [[l[0], *box]]), 0)
|
191 |
+
segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1))
|
192 |
+
cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)
|
193 |
+
|
194 |
+
result = cv2.bitwise_and(src1=im, src2=im_new)
|
195 |
+
result = cv2.flip(result, 1) # augment segments (flip left-right)
|
196 |
+
i = result > 0 # pixels to replace
|
197 |
+
# i[:, :] = result.max(2).reshape(h, w, 1) # act over ch
|
198 |
+
im[i] = result[i] # cv2.imwrite('debug.jpg', img) # debug
|
199 |
+
|
200 |
+
return im, labels, segments
|
201 |
+
|
202 |
+
|
203 |
+
def cutout(im, labels):
|
204 |
+
# Applies image cutout augmentation https://arxiv.org/abs/1708.04552
|
205 |
+
h, w = im.shape[:2]
|
206 |
+
|
207 |
+
# create random masks
|
208 |
+
scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
|
209 |
+
for s in scales:
|
210 |
+
mask_h = random.randint(1, int(h * s))
|
211 |
+
mask_w = random.randint(1, int(w * s))
|
212 |
+
|
213 |
+
# box
|
214 |
+
xmin = max(0, random.randint(0, w) - mask_w // 2)
|
215 |
+
ymin = max(0, random.randint(0, h) - mask_h // 2)
|
216 |
+
xmax = min(w, xmin + mask_w)
|
217 |
+
ymax = min(h, ymin + mask_h)
|
218 |
+
|
219 |
+
# apply random color mask
|
220 |
+
im[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
|
221 |
+
|
222 |
+
# return unobscured labels
|
223 |
+
if len(labels) and s > 0.03:
|
224 |
+
box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
|
225 |
+
ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
|
226 |
+
labels = labels[ioa < 0.60] # remove >60% obscured labels
|
227 |
+
|
228 |
+
return labels
|
229 |
+
|
230 |
+
|
231 |
+
def mixup(im, labels, im2, labels2):
|
232 |
+
# Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf
|
233 |
+
r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
|
234 |
+
im = (im * r + im2 * (1 - r)).astype(np.uint8)
|
235 |
+
labels = np.concatenate((labels, labels2), 0)
|
236 |
+
return im, labels
|
237 |
+
|
238 |
+
|
239 |
+
def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n)
|
240 |
+
# Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
|
241 |
+
w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
|
242 |
+
w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
|
243 |
+
ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio
|
244 |
+
return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates
|
utils/datasets.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
#
|
2 |
|
3 |
import glob
|
4 |
import hashlib
|
@@ -14,7 +14,6 @@ from pathlib import Path
|
|
14 |
from threading import Thread
|
15 |
|
16 |
import cv2
|
17 |
-
import math
|
18 |
import numpy as np
|
19 |
import torch
|
20 |
import torch.nn.functional as F
|
@@ -23,9 +22,9 @@ from PIL import Image, ExifTags
|
|
23 |
from torch.utils.data import Dataset
|
24 |
from tqdm import tqdm
|
25 |
|
|
|
26 |
from utils.general import check_requirements, check_file, check_dataset, xywh2xyxy, xywhn2xyxy, xyxy2xywhn, \
|
27 |
-
xyn2xy,
|
28 |
-
from utils.metrics import bbox_ioa
|
29 |
from utils.torch_utils import torch_distributed_zero_first
|
30 |
|
31 |
# Parameters
|
@@ -523,12 +522,10 @@ class LoadImagesAndLabels(Dataset): # for training/testing
|
|
523 |
img, labels = load_mosaic(self, index)
|
524 |
shapes = None
|
525 |
|
526 |
-
# MixUp
|
527 |
if random.random() < hyp['mixup']:
|
528 |
-
|
529 |
-
|
530 |
-
img = (img * r + img2 * (1 - r)).astype(np.uint8)
|
531 |
-
labels = np.concatenate((labels, labels2), 0)
|
532 |
|
533 |
else:
|
534 |
# Load image
|
@@ -639,32 +636,6 @@ def load_image(self, index):
|
|
639 |
return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw_original, hw_resized
|
640 |
|
641 |
|
642 |
-
def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
|
643 |
-
if hgain or sgain or vgain:
|
644 |
-
r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
|
645 |
-
hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
|
646 |
-
dtype = img.dtype # uint8
|
647 |
-
|
648 |
-
x = np.arange(0, 256, dtype=r.dtype)
|
649 |
-
lut_hue = ((x * r[0]) % 180).astype(dtype)
|
650 |
-
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
|
651 |
-
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
|
652 |
-
|
653 |
-
img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
|
654 |
-
cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed
|
655 |
-
|
656 |
-
|
657 |
-
def hist_equalize(img, clahe=True, bgr=False):
|
658 |
-
# Equalize histogram on BGR image 'img' with img.shape(n,m,3) and range 0-255
|
659 |
-
yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV)
|
660 |
-
if clahe:
|
661 |
-
c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
662 |
-
yuv[:, :, 0] = c.apply(yuv[:, :, 0])
|
663 |
-
else:
|
664 |
-
yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0]) # equalize Y channel histogram
|
665 |
-
return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB) # convert YUV image to RGB
|
666 |
-
|
667 |
-
|
668 |
def load_mosaic(self, index):
|
669 |
# loads images in a 4-mosaic
|
670 |
|
@@ -796,205 +767,6 @@ def load_mosaic9(self, index):
|
|
796 |
return img9, labels9
|
797 |
|
798 |
|
799 |
-
def replicate(img, labels):
|
800 |
-
# Replicate labels
|
801 |
-
h, w = img.shape[:2]
|
802 |
-
boxes = labels[:, 1:].astype(int)
|
803 |
-
x1, y1, x2, y2 = boxes.T
|
804 |
-
s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels)
|
805 |
-
for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices
|
806 |
-
x1b, y1b, x2b, y2b = boxes[i]
|
807 |
-
bh, bw = y2b - y1b, x2b - x1b
|
808 |
-
yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y
|
809 |
-
x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
|
810 |
-
img[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
|
811 |
-
labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
|
812 |
-
|
813 |
-
return img, labels
|
814 |
-
|
815 |
-
|
816 |
-
def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
|
817 |
-
# Resize and pad image while meeting stride-multiple constraints
|
818 |
-
shape = img.shape[:2] # current shape [height, width]
|
819 |
-
if isinstance(new_shape, int):
|
820 |
-
new_shape = (new_shape, new_shape)
|
821 |
-
|
822 |
-
# Scale ratio (new / old)
|
823 |
-
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
|
824 |
-
if not scaleup: # only scale down, do not scale up (for better test mAP)
|
825 |
-
r = min(r, 1.0)
|
826 |
-
|
827 |
-
# Compute padding
|
828 |
-
ratio = r, r # width, height ratios
|
829 |
-
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
|
830 |
-
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
|
831 |
-
if auto: # minimum rectangle
|
832 |
-
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
|
833 |
-
elif scaleFill: # stretch
|
834 |
-
dw, dh = 0.0, 0.0
|
835 |
-
new_unpad = (new_shape[1], new_shape[0])
|
836 |
-
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
|
837 |
-
|
838 |
-
dw /= 2 # divide padding into 2 sides
|
839 |
-
dh /= 2
|
840 |
-
|
841 |
-
if shape[::-1] != new_unpad: # resize
|
842 |
-
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
|
843 |
-
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
|
844 |
-
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
|
845 |
-
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
|
846 |
-
return img, ratio, (dw, dh)
|
847 |
-
|
848 |
-
|
849 |
-
def random_perspective(img, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0,
|
850 |
-
border=(0, 0)):
|
851 |
-
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
|
852 |
-
# targets = [cls, xyxy]
|
853 |
-
|
854 |
-
height = img.shape[0] + border[0] * 2 # shape(h,w,c)
|
855 |
-
width = img.shape[1] + border[1] * 2
|
856 |
-
|
857 |
-
# Center
|
858 |
-
C = np.eye(3)
|
859 |
-
C[0, 2] = -img.shape[1] / 2 # x translation (pixels)
|
860 |
-
C[1, 2] = -img.shape[0] / 2 # y translation (pixels)
|
861 |
-
|
862 |
-
# Perspective
|
863 |
-
P = np.eye(3)
|
864 |
-
P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
|
865 |
-
P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
|
866 |
-
|
867 |
-
# Rotation and Scale
|
868 |
-
R = np.eye(3)
|
869 |
-
a = random.uniform(-degrees, degrees)
|
870 |
-
# a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
|
871 |
-
s = random.uniform(1 - scale, 1 + scale)
|
872 |
-
# s = 2 ** random.uniform(-scale, scale)
|
873 |
-
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
|
874 |
-
|
875 |
-
# Shear
|
876 |
-
S = np.eye(3)
|
877 |
-
S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
|
878 |
-
S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
|
879 |
-
|
880 |
-
# Translation
|
881 |
-
T = np.eye(3)
|
882 |
-
T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels)
|
883 |
-
T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels)
|
884 |
-
|
885 |
-
# Combined rotation matrix
|
886 |
-
M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
|
887 |
-
if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
|
888 |
-
if perspective:
|
889 |
-
img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114))
|
890 |
-
else: # affine
|
891 |
-
img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
|
892 |
-
|
893 |
-
# Visualize
|
894 |
-
# import matplotlib.pyplot as plt
|
895 |
-
# ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
|
896 |
-
# ax[0].imshow(img[:, :, ::-1]) # base
|
897 |
-
# ax[1].imshow(img2[:, :, ::-1]) # warped
|
898 |
-
|
899 |
-
# Transform label coordinates
|
900 |
-
n = len(targets)
|
901 |
-
if n:
|
902 |
-
use_segments = any(x.any() for x in segments)
|
903 |
-
new = np.zeros((n, 4))
|
904 |
-
if use_segments: # warp segments
|
905 |
-
segments = resample_segments(segments) # upsample
|
906 |
-
for i, segment in enumerate(segments):
|
907 |
-
xy = np.ones((len(segment), 3))
|
908 |
-
xy[:, :2] = segment
|
909 |
-
xy = xy @ M.T # transform
|
910 |
-
xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2] # perspective rescale or affine
|
911 |
-
|
912 |
-
# clip
|
913 |
-
new[i] = segment2box(xy, width, height)
|
914 |
-
|
915 |
-
else: # warp boxes
|
916 |
-
xy = np.ones((n * 4, 3))
|
917 |
-
xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
|
918 |
-
xy = xy @ M.T # transform
|
919 |
-
xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine
|
920 |
-
|
921 |
-
# create new boxes
|
922 |
-
x = xy[:, [0, 2, 4, 6]]
|
923 |
-
y = xy[:, [1, 3, 5, 7]]
|
924 |
-
new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
|
925 |
-
|
926 |
-
# clip
|
927 |
-
new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
|
928 |
-
new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
|
929 |
-
|
930 |
-
# filter candidates
|
931 |
-
i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)
|
932 |
-
targets = targets[i]
|
933 |
-
targets[:, 1:5] = new[i]
|
934 |
-
|
935 |
-
return img, targets
|
936 |
-
|
937 |
-
|
938 |
-
def copy_paste(img, labels, segments, probability=0.5):
|
939 |
-
# Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
|
940 |
-
n = len(segments)
|
941 |
-
if probability and n:
|
942 |
-
h, w, c = img.shape # height, width, channels
|
943 |
-
im_new = np.zeros(img.shape, np.uint8)
|
944 |
-
for j in random.sample(range(n), k=round(probability * n)):
|
945 |
-
l, s = labels[j], segments[j]
|
946 |
-
box = w - l[3], l[2], w - l[1], l[4]
|
947 |
-
ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
|
948 |
-
if (ioa < 0.30).all(): # allow 30% obscuration of existing labels
|
949 |
-
labels = np.concatenate((labels, [[l[0], *box]]), 0)
|
950 |
-
segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1))
|
951 |
-
cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)
|
952 |
-
|
953 |
-
result = cv2.bitwise_and(src1=img, src2=im_new)
|
954 |
-
result = cv2.flip(result, 1) # augment segments (flip left-right)
|
955 |
-
i = result > 0 # pixels to replace
|
956 |
-
# i[:, :] = result.max(2).reshape(h, w, 1) # act over ch
|
957 |
-
img[i] = result[i] # cv2.imwrite('debug.jpg', img) # debug
|
958 |
-
|
959 |
-
return img, labels, segments
|
960 |
-
|
961 |
-
|
962 |
-
def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n)
|
963 |
-
# Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
|
964 |
-
w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
|
965 |
-
w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
|
966 |
-
ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio
|
967 |
-
return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates
|
968 |
-
|
969 |
-
|
970 |
-
def cutout(image, labels):
|
971 |
-
# Applies image cutout augmentation https://arxiv.org/abs/1708.04552
|
972 |
-
h, w = image.shape[:2]
|
973 |
-
|
974 |
-
# create random masks
|
975 |
-
scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
|
976 |
-
for s in scales:
|
977 |
-
mask_h = random.randint(1, int(h * s))
|
978 |
-
mask_w = random.randint(1, int(w * s))
|
979 |
-
|
980 |
-
# box
|
981 |
-
xmin = max(0, random.randint(0, w) - mask_w // 2)
|
982 |
-
ymin = max(0, random.randint(0, h) - mask_h // 2)
|
983 |
-
xmax = min(w, xmin + mask_w)
|
984 |
-
ymax = min(h, ymin + mask_h)
|
985 |
-
|
986 |
-
# apply random color mask
|
987 |
-
image[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
|
988 |
-
|
989 |
-
# return unobscured labels
|
990 |
-
if len(labels) and s > 0.03:
|
991 |
-
box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
|
992 |
-
ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
|
993 |
-
labels = labels[ioa < 0.60] # remove >60% obscured labels
|
994 |
-
|
995 |
-
return labels
|
996 |
-
|
997 |
-
|
998 |
def create_folder(path='./new'):
|
999 |
# Create folder
|
1000 |
if os.path.exists(path):
|
@@ -1012,7 +784,6 @@ def flatten_recursive(path='../datasets/coco128'):
|
|
1012 |
|
1013 |
def extract_boxes(path='../datasets/coco128'): # from utils.datasets import *; extract_boxes()
|
1014 |
# Convert detection dataset into classification dataset, with one directory per class
|
1015 |
-
|
1016 |
path = Path(path) # images dir
|
1017 |
shutil.rmtree(path / 'classifier') if (path / 'classifier').is_dir() else None # remove existing
|
1018 |
files = list(path.rglob('*.*'))
|
|
|
1 |
+
# YOLOv5 dataset utils and dataloaders
|
2 |
|
3 |
import glob
|
4 |
import hashlib
|
|
|
14 |
from threading import Thread
|
15 |
|
16 |
import cv2
|
|
|
17 |
import numpy as np
|
18 |
import torch
|
19 |
import torch.nn.functional as F
|
|
|
22 |
from torch.utils.data import Dataset
|
23 |
from tqdm import tqdm
|
24 |
|
25 |
+
from utils.augmentations import augment_hsv, copy_paste, letterbox, mixup, random_perspective
|
26 |
from utils.general import check_requirements, check_file, check_dataset, xywh2xyxy, xywhn2xyxy, xyxy2xywhn, \
|
27 |
+
xyn2xy, segments2boxes, clean_str
|
|
|
28 |
from utils.torch_utils import torch_distributed_zero_first
|
29 |
|
30 |
# Parameters
|
|
|
522 |
img, labels = load_mosaic(self, index)
|
523 |
shapes = None
|
524 |
|
525 |
+
# MixUp augmentation
|
526 |
if random.random() < hyp['mixup']:
|
527 |
+
img, labels = mixup(img, labels, *load_mosaic(self, random.randint(0, self.n - 1)))
|
528 |
+
|
|
|
|
|
529 |
|
530 |
else:
|
531 |
# Load image
|
|
|
636 |
return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw_original, hw_resized
|
637 |
|
638 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
639 |
def load_mosaic(self, index):
|
640 |
# loads images in a 4-mosaic
|
641 |
|
|
|
767 |
return img9, labels9
|
768 |
|
769 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
770 |
def create_folder(path='./new'):
|
771 |
# Create folder
|
772 |
if os.path.exists(path):
|
|
|
784 |
|
785 |
def extract_boxes(path='../datasets/coco128'): # from utils.datasets import *; extract_boxes()
|
786 |
# Convert detection dataset into classification dataset, with one directory per class
|
|
|
787 |
path = Path(path) # images dir
|
788 |
shutil.rmtree(path / 'classifier') if (path / 'classifier').is_dir() else None # remove existing
|
789 |
files = list(path.rglob('*.*'))
|