commited on
Create `utils/` (#3877)
Browse files* Create `utils/`
* cleanup
- utils/ +244 -0
- utils/ +6 -235
@@ -0,0 +1,244 @@
1 |
# YOLOv5 image augmentation functions
2 |
3 |
import random
4 |
5 |
import cv2
6 |
import math
7 |
import numpy as np
8 |
9 |
from utils.general import segment2box, resample_segments
10 |
from utils.metrics import bbox_ioa
11 |
12 |
13 |
def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
14 |
# HSV color-space augmentation
15 |
if hgain or sgain or vgain:
16 |
r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
17 |
hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV))
18 |
dtype = im.dtype # uint8
19 |
20 |
x = np.arange(0, 256, dtype=r.dtype)
21 |
lut_hue = ((x * r[0]) % 180).astype(dtype)
22 |
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
23 |
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
24 |
25 |
img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
26 |
cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=im) # no return needed
27 |
28 |
29 |
def hist_equalize(im, clahe=True, bgr=False):
30 |
# Equalize histogram on BGR image 'img' with img.shape(n,m,3) and range 0-255
31 |
yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV)
32 |
if clahe:
33 |
c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
34 |
yuv[:, :, 0] = c.apply(yuv[:, :, 0])
35 |
36 |
yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0]) # equalize Y channel histogram
37 |
return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB) # convert YUV image to RGB
38 |
39 |
40 |
def replicate(im, labels):
41 |
# Replicate labels
42 |
h, w = im.shape[:2]
43 |
boxes = labels[:, 1:].astype(int)
44 |
x1, y1, x2, y2 = boxes.T
45 |
s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels)
46 |
for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices
47 |
x1b, y1b, x2b, y2b = boxes[i]
48 |
bh, bw = y2b - y1b, x2b - x1b
49 |
yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y
50 |
x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
51 |
im[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
52 |
labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
53 |
54 |
return im, labels
55 |
56 |
57 |
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
58 |
# Resize and pad image while meeting stride-multiple constraints
59 |
shape = im.shape[:2] # current shape [height, width]
60 |
if isinstance(new_shape, int):
61 |
new_shape = (new_shape, new_shape)
62 |
63 |
# Scale ratio (new / old)
64 |
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
65 |
if not scaleup: # only scale down, do not scale up (for better test mAP)
66 |
r = min(r, 1.0)
67 |
68 |
# Compute padding
69 |
ratio = r, r # width, height ratios
70 |
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
71 |
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
72 |
if auto: # minimum rectangle
73 |
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
74 |
elif scaleFill: # stretch
75 |
dw, dh = 0.0, 0.0
76 |
new_unpad = (new_shape[1], new_shape[0])
77 |
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
78 |
79 |
dw /= 2 # divide padding into 2 sides
80 |
dh /= 2
81 |
82 |
if shape[::-1] != new_unpad: # resize
83 |
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
84 |
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
85 |
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
86 |
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
87 |
return im, ratio, (dw, dh)
88 |
89 |
90 |
def random_perspective(im, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0,
91 |
border=(0, 0)):
92 |
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
93 |
# targets = [cls, xyxy]
94 |
95 |
height = im.shape[0] + border[0] * 2 # shape(h,w,c)
96 |
width = im.shape[1] + border[1] * 2
97 |
98 |
# Center
99 |
C = np.eye(3)
100 |
C[0, 2] = -im.shape[1] / 2 # x translation (pixels)
101 |
C[1, 2] = -im.shape[0] / 2 # y translation (pixels)
102 |
103 |
# Perspective
104 |
P = np.eye(3)
105 |
P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
106 |
P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
107 |
108 |
# Rotation and Scale
109 |
R = np.eye(3)
110 |
a = random.uniform(-degrees, degrees)
111 |
# a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
112 |
s = random.uniform(1 - scale, 1 + scale)
113 |
# s = 2 ** random.uniform(-scale, scale)
114 |
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
115 |
116 |
# Shear
117 |
S = np.eye(3)
118 |
S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
119 |
S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
120 |
121 |
# Translation
122 |
T = np.eye(3)
123 |
T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels)
124 |
T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels)
125 |
126 |
# Combined rotation matrix
127 |
M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
128 |
if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
129 |
if perspective:
130 |
im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114))
131 |
else: # affine
132 |
im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
133 |
134 |
# Visualize
135 |
# import matplotlib.pyplot as plt
136 |
# ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
137 |
# ax[0].imshow(img[:, :, ::-1]) # base
138 |
# ax[1].imshow(img2[:, :, ::-1]) # warped
139 |
140 |
# Transform label coordinates
141 |
n = len(targets)
142 |
if n:
143 |
use_segments = any(x.any() for x in segments)
144 |
new = np.zeros((n, 4))
145 |
if use_segments: # warp segments
146 |
segments = resample_segments(segments) # upsample
147 |
for i, segment in enumerate(segments):
148 |
xy = np.ones((len(segment), 3))
149 |
xy[:, :2] = segment
150 |
xy = xy @ M.T # transform
151 |
xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2] # perspective rescale or affine
152 |
153 |
# clip
154 |
new[i] = segment2box(xy, width, height)
155 |
156 |
else: # warp boxes
157 |
xy = np.ones((n * 4, 3))
158 |
xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
159 |
xy = xy @ M.T # transform
160 |
xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine
161 |
162 |
# create new boxes
163 |
x = xy[:, [0, 2, 4, 6]]
164 |
y = xy[:, [1, 3, 5, 7]]
165 |
new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
166 |
167 |
# clip
168 |
new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
169 |
new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
170 |
171 |
# filter candidates
172 |
i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)
173 |
targets = targets[i]
174 |
targets[:, 1:5] = new[i]
175 |
176 |
return im, targets
177 |
178 |
179 |
def copy_paste(im, labels, segments, probability=0.5):
180 |
# Implement Copy-Paste augmentation, labels as nx5 np.array(cls, xyxy)
181 |
n = len(segments)
182 |
if probability and n:
183 |
h, w, c = im.shape # height, width, channels
184 |
im_new = np.zeros(im.shape, np.uint8)
185 |
for j in random.sample(range(n), k=round(probability * n)):
186 |
l, s = labels[j], segments[j]
187 |
box = w - l[3], l[2], w - l[1], l[4]
188 |
ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
189 |
if (ioa < 0.30).all(): # allow 30% obscuration of existing labels
190 |
labels = np.concatenate((labels, [[l[0], *box]]), 0)
191 |
segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1))
192 |
cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)
193 |
194 |
result = cv2.bitwise_and(src1=im, src2=im_new)
195 |
result = cv2.flip(result, 1) # augment segments (flip left-right)
196 |
i = result > 0 # pixels to replace
197 |
# i[:, :] = result.max(2).reshape(h, w, 1) # act over ch
198 |
im[i] = result[i] # cv2.imwrite('debug.jpg', img) # debug
199 |
200 |
return im, labels, segments
201 |
202 |
203 |
def cutout(im, labels):
204 |
# Applies image cutout augmentation
205 |
h, w = im.shape[:2]
206 |
207 |
# create random masks
208 |
scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
209 |
for s in scales:
210 |
mask_h = random.randint(1, int(h * s))
211 |
mask_w = random.randint(1, int(w * s))
212 |
213 |
# box
214 |
xmin = max(0, random.randint(0, w) - mask_w // 2)
215 |
ymin = max(0, random.randint(0, h) - mask_h // 2)
216 |
xmax = min(w, xmin + mask_w)
217 |
ymax = min(h, ymin + mask_h)
218 |
219 |
# apply random color mask
220 |
im[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
221 |
222 |
# return unobscured labels
223 |
if len(labels) and s > 0.03:
224 |
box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
225 |
ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
226 |
labels = labels[ioa < 0.60] # remove >60% obscured labels
227 |
228 |
return labels
229 |
230 |
231 |
def mixup(im, labels, im2, labels2):
232 |
# Applies MixUp augmentation
233 |
r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
234 |
im = (im * r + im2 * (1 - r)).astype(np.uint8)
235 |
labels = np.concatenate((labels, labels2), 0)
236 |
return im, labels
237 |
238 |
239 |
def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n)
240 |
# Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
241 |
w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
242 |
w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
243 |
ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio
244 |
return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates
@@ -1,4 +1,4 @@
1 |
2 |
3 |
import glob
4 |
import hashlib
@@ -14,7 +14,6 @@ from pathlib import Path
14 |
from threading import Thread
15 |
16 |
import cv2
17 |
import math
18 |
import numpy as np
19 |
import torch
20 |
import torch.nn.functional as F
@@ -23,9 +22,9 @@ from PIL import Image, ExifTags
23 |
from import Dataset
24 |
from tqdm import tqdm
25 |
26 |
from utils.general import check_requirements, check_file, check_dataset, xywh2xyxy, xywhn2xyxy, xyxy2xywhn, \
27 |
28 |
from utils.metrics import bbox_ioa
29 |
from utils.torch_utils import torch_distributed_zero_first
30 |
31 |
# Parameters
@@ -523,12 +522,10 @@ class LoadImagesAndLabels(Dataset): # for training/testing
523 |
img, labels = load_mosaic(self, index)
524 |
shapes = None
525 |
526 |
# MixUp
527 |
if random.random() < hyp['mixup']:
528 |
529 |
530 |
img = (img * r + img2 * (1 - r)).astype(np.uint8)
531 |
labels = np.concatenate((labels, labels2), 0)
532 |
533 |
534 |
# Load image
@@ -639,32 +636,6 @@ def load_image(self, index):
639 |
return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw_original, hw_resized
640 |
641 |
642 |
def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
643 |
if hgain or sgain or vgain:
644 |
r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
645 |
hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
646 |
dtype = img.dtype # uint8
647 |
648 |
x = np.arange(0, 256, dtype=r.dtype)
649 |
lut_hue = ((x * r[0]) % 180).astype(dtype)
650 |
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
651 |
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
652 |
653 |
img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
654 |
cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed
655 |
656 |
657 |
def hist_equalize(img, clahe=True, bgr=False):
658 |
# Equalize histogram on BGR image 'img' with img.shape(n,m,3) and range 0-255
659 |
yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV)
660 |
if clahe:
661 |
c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
662 |
yuv[:, :, 0] = c.apply(yuv[:, :, 0])
663 |
664 |
yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0]) # equalize Y channel histogram
665 |
return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB) # convert YUV image to RGB
666 |
667 |
668 |
def load_mosaic(self, index):
669 |
# loads images in a 4-mosaic
670 |
@@ -796,205 +767,6 @@ def load_mosaic9(self, index):
796 |
return img9, labels9
797 |
798 |
799 |
def replicate(img, labels):
800 |
# Replicate labels
801 |
h, w = img.shape[:2]
802 |
boxes = labels[:, 1:].astype(int)
803 |
x1, y1, x2, y2 = boxes.T
804 |
s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels)
805 |
for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices
806 |
x1b, y1b, x2b, y2b = boxes[i]
807 |
bh, bw = y2b - y1b, x2b - x1b
808 |
yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y
809 |
x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
810 |
img[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
811 |
labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
812 |
813 |
return img, labels
814 |
815 |
816 |
def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
817 |
# Resize and pad image while meeting stride-multiple constraints
818 |
shape = img.shape[:2] # current shape [height, width]
819 |
if isinstance(new_shape, int):
820 |
new_shape = (new_shape, new_shape)
821 |
822 |
# Scale ratio (new / old)
823 |
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
824 |
if not scaleup: # only scale down, do not scale up (for better test mAP)
825 |
r = min(r, 1.0)
826 |
827 |
# Compute padding
828 |
ratio = r, r # width, height ratios
829 |
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
830 |
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
831 |
if auto: # minimum rectangle
832 |
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
833 |
elif scaleFill: # stretch
834 |
dw, dh = 0.0, 0.0
835 |
new_unpad = (new_shape[1], new_shape[0])
836 |
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
837 |
838 |
dw /= 2 # divide padding into 2 sides
839 |
dh /= 2
840 |
841 |
if shape[::-1] != new_unpad: # resize
842 |
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
843 |
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
844 |
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
845 |
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
846 |
return img, ratio, (dw, dh)
847 |
848 |
849 |
def random_perspective(img, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0,
850 |
border=(0, 0)):
851 |
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
852 |
# targets = [cls, xyxy]
853 |
854 |
height = img.shape[0] + border[0] * 2 # shape(h,w,c)
855 |
width = img.shape[1] + border[1] * 2
856 |
857 |
# Center
858 |
C = np.eye(3)
859 |
C[0, 2] = -img.shape[1] / 2 # x translation (pixels)
860 |
C[1, 2] = -img.shape[0] / 2 # y translation (pixels)
861 |
862 |
# Perspective
863 |
P = np.eye(3)
864 |
P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
865 |
P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
866 |
867 |
# Rotation and Scale
868 |
R = np.eye(3)
869 |
a = random.uniform(-degrees, degrees)
870 |
# a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
871 |
s = random.uniform(1 - scale, 1 + scale)
872 |
# s = 2 ** random.uniform(-scale, scale)
873 |
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
874 |
875 |
# Shear
876 |
S = np.eye(3)
877 |
S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
878 |
S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
879 |
880 |
# Translation
881 |
T = np.eye(3)
882 |
T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels)
883 |
T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels)
884 |
885 |
# Combined rotation matrix
886 |
M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
887 |
if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
888 |
if perspective:
889 |
img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114))
890 |
else: # affine
891 |
img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
892 |
893 |
# Visualize
894 |
# import matplotlib.pyplot as plt
895 |
# ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
896 |
# ax[0].imshow(img[:, :, ::-1]) # base
897 |
# ax[1].imshow(img2[:, :, ::-1]) # warped
898 |
899 |
# Transform label coordinates
900 |
n = len(targets)
901 |
if n:
902 |
use_segments = any(x.any() for x in segments)
903 |
new = np.zeros((n, 4))
904 |
if use_segments: # warp segments
905 |
segments = resample_segments(segments) # upsample
906 |
for i, segment in enumerate(segments):
907 |
xy = np.ones((len(segment), 3))
908 |
xy[:, :2] = segment
909 |
xy = xy @ M.T # transform
910 |
xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2] # perspective rescale or affine
911 |
912 |
# clip
913 |
new[i] = segment2box(xy, width, height)
914 |
915 |
else: # warp boxes
916 |
xy = np.ones((n * 4, 3))
917 |
xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
918 |
xy = xy @ M.T # transform
919 |
xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine
920 |
921 |
# create new boxes
922 |
x = xy[:, [0, 2, 4, 6]]
923 |
y = xy[:, [1, 3, 5, 7]]
924 |
new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
925 |
926 |
# clip
927 |
new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
928 |
new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
929 |
930 |
# filter candidates
931 |
i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)
932 |
targets = targets[i]
933 |
targets[:, 1:5] = new[i]
934 |
935 |
return img, targets
936 |
937 |
938 |
def copy_paste(img, labels, segments, probability=0.5):
939 |
# Implement Copy-Paste augmentation, labels as nx5 np.array(cls, xyxy)
940 |
n = len(segments)
941 |
if probability and n:
942 |
h, w, c = img.shape # height, width, channels
943 |
im_new = np.zeros(img.shape, np.uint8)
944 |
for j in random.sample(range(n), k=round(probability * n)):
945 |
l, s = labels[j], segments[j]
946 |
box = w - l[3], l[2], w - l[1], l[4]
947 |
ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
948 |
if (ioa < 0.30).all(): # allow 30% obscuration of existing labels
949 |
labels = np.concatenate((labels, [[l[0], *box]]), 0)
950 |
segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1))
951 |
cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)
952 |
953 |
result = cv2.bitwise_and(src1=img, src2=im_new)
954 |
result = cv2.flip(result, 1) # augment segments (flip left-right)
955 |
i = result > 0 # pixels to replace
956 |
# i[:, :] = result.max(2).reshape(h, w, 1) # act over ch
957 |
img[i] = result[i] # cv2.imwrite('debug.jpg', img) # debug
958 |
959 |
return img, labels, segments
960 |
961 |
962 |
def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n)
963 |
# Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
964 |
w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
965 |
w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
966 |
ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio
967 |
return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates
968 |
969 |
970 |
def cutout(image, labels):
971 |
# Applies image cutout augmentation
972 |
h, w = image.shape[:2]
973 |
974 |
# create random masks
975 |
scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
976 |
for s in scales:
977 |
mask_h = random.randint(1, int(h * s))
978 |
mask_w = random.randint(1, int(w * s))
979 |
980 |
# box
981 |
xmin = max(0, random.randint(0, w) - mask_w // 2)
982 |
ymin = max(0, random.randint(0, h) - mask_h // 2)
983 |
xmax = min(w, xmin + mask_w)
984 |
ymax = min(h, ymin + mask_h)
985 |
986 |
# apply random color mask
987 |
image[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
988 |
989 |
# return unobscured labels
990 |
if len(labels) and s > 0.03:
991 |
box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
992 |
ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
993 |
labels = labels[ioa < 0.60] # remove >60% obscured labels
994 |
995 |
return labels
996 |
997 |
998 |
def create_folder(path='./new'):
999 |
# Create folder
1000 |
if os.path.exists(path):
@@ -1012,7 +784,6 @@ def flatten_recursive(path='../datasets/coco128'):
1012 |
1013 |
def extract_boxes(path='../datasets/coco128'): # from utils.datasets import *; extract_boxes()
1014 |
# Convert detection dataset into classification dataset, with one directory per class
1015 |
1016 |
path = Path(path) # images dir
1017 |
shutil.rmtree(path / 'classifier') if (path / 'classifier').is_dir() else None # remove existing
1018 |
files = list(path.rglob('*.*'))
1 |
# YOLOv5 dataset utils and dataloaders
2 |
3 |
import glob
4 |
import hashlib
14 |
from threading import Thread
15 |
16 |
import cv2
17 |
import numpy as np
18 |
import torch
19 |
import torch.nn.functional as F
22 |
from import Dataset
23 |
from tqdm import tqdm
24 |
25 |
from utils.augmentations import augment_hsv, copy_paste, letterbox, mixup, random_perspective
26 |
from utils.general import check_requirements, check_file, check_dataset, xywh2xyxy, xywhn2xyxy, xyxy2xywhn, \
27 |
xyn2xy, segments2boxes, clean_str
28 |
from utils.torch_utils import torch_distributed_zero_first
29 |
30 |
# Parameters
522 |
img, labels = load_mosaic(self, index)
523 |
shapes = None
524 |
525 |
# MixUp augmentation
526 |
if random.random() < hyp['mixup']:
527 |
img, labels = mixup(img, labels, *load_mosaic(self, random.randint(0, self.n - 1)))
528 |
529 |
530 |
531 |
# Load image
636 |
return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw_original, hw_resized
637 |
638 |
639 |
def load_mosaic(self, index):
640 |
# loads images in a 4-mosaic
641 |
767 |
return img9, labels9
768 |
769 |
770 |
def create_folder(path='./new'):
771 |
# Create folder
772 |
if os.path.exists(path):
784 |
785 |
def extract_boxes(path='../datasets/coco128'): # from utils.datasets import *; extract_boxes()
786 |
# Convert detection dataset into classification dataset, with one directory per class
787 |
path = Path(path) # images dir
788 |
shutil.rmtree(path / 'classifier') if (path / 'classifier').is_dir() else None # remove existing
789 |
files = list(path.rglob('*.*'))