Spaces:
Sleeping
Sleeping
""" | |
Creates a Pytorch dataset to load the Pascal VOC & MS COCO datasets | |
""" | |
import config | |
import numpy as np | |
import os | |
import pandas as pd | |
import torch | |
from PIL import Image, ImageFile | |
from torch.utils.data import Dataset, DataLoader | |
from utils import ( | |
cells_to_bboxes, | |
iou_width_height as iou, | |
non_max_suppression as nms, | |
plot_image | |
) | |
ImageFile.LOAD_TRUNCATED_IMAGES = True | |
class YOLODataset(Dataset): | |
def __init__( | |
self, | |
csv_file, | |
img_dir, | |
label_dir, | |
anchors, | |
image_size=416, | |
S=[13, 26, 52], | |
C=20, | |
transform=None, | |
): | |
self.annotations = pd.read_csv(csv_file) | |
self.img_dir = img_dir | |
self.label_dir = label_dir | |
self.image_size = image_size | |
self.transform = transform | |
self.S = S | |
self.anchors = torch.tensor(anchors[0] + anchors[1] + anchors[2]) # for all 3 scales | |
self.num_anchors = self.anchors.shape[0] | |
self.num_anchors_per_scale = self.num_anchors // 3 | |
self.C = C | |
self.ignore_iou_thresh = 0.5 | |
def __len__(self): | |
return len(self.annotations) | |
def __getitem__(self, index): | |
label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1]) | |
bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist() | |
img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0]) | |
image = np.array(Image.open(img_path).convert("RGB")) | |
if self.transform: | |
augmentations = self.transform(image=image, bboxes=bboxes) | |
image = augmentations["image"] | |
bboxes = augmentations["bboxes"] | |
# Below assumes 3 scale predictions (as paper) and same num of anchors per scale | |
targets = [torch.zeros((self.num_anchors // 3, S, S, 6)) for S in self.S] | |
for box in bboxes: | |
iou_anchors = iou(torch.tensor(box[2:4]), self.anchors) | |
anchor_indices = iou_anchors.argsort(descending=True, dim=0) | |
x, y, width, height, class_label = box | |
has_anchor = [False] * 3 # each scale should have one anchor | |
for anchor_idx in anchor_indices: | |
scale_idx = anchor_idx // self.num_anchors_per_scale | |
anchor_on_scale = anchor_idx % self.num_anchors_per_scale | |
S = self.S[scale_idx] | |
i, j = int(S * y), int(S * x) # which cell | |
anchor_taken = targets[scale_idx][anchor_on_scale, i, j, 0] | |
if not anchor_taken and not has_anchor[scale_idx]: | |
targets[scale_idx][anchor_on_scale, i, j, 0] = 1 | |
x_cell, y_cell = S * x - j, S * y - i # both between [0,1] | |
width_cell, height_cell = ( | |
width * S, | |
height * S, | |
) # can be greater than 1 since it's relative to cell | |
box_coordinates = torch.tensor( | |
[x_cell, y_cell, width_cell, height_cell] | |
) | |
targets[scale_idx][anchor_on_scale, i, j, 1:5] = box_coordinates | |
targets[scale_idx][anchor_on_scale, i, j, 5] = int(class_label) | |
has_anchor[scale_idx] = True | |
elif not anchor_taken and iou_anchors[anchor_idx] > self.ignore_iou_thresh: | |
targets[scale_idx][anchor_on_scale, i, j, 0] = -1 # ignore prediction | |
return image, tuple(targets) | |
def test(): | |
anchors = config.ANCHORS | |
transform = config.test_transforms | |
dataset = YOLODataset( | |
"COCO/train.csv", | |
"COCO/images/images/", | |
"COCO/labels/labels_new/", | |
S=[13, 26, 52], | |
anchors=anchors, | |
transform=transform, | |
) | |
S = [13, 26, 52] | |
scaled_anchors = torch.tensor(anchors) / ( | |
1 / torch.tensor(S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2) | |
) | |
loader = DataLoader(dataset=dataset, batch_size=1, shuffle=True) | |
for x, y in loader: | |
boxes = [] | |
for i in range(y[0].shape[1]): | |
anchor = scaled_anchors[i] | |
print(anchor.shape) | |
print(y[i].shape) | |
boxes += cells_to_bboxes( | |
y[i], is_preds=False, S=y[i].shape[2], anchors=anchor | |
)[0] | |
boxes = nms(boxes, iou_threshold=1, threshold=0.7, box_format="midpoint") | |
print(boxes) | |
plot_image(x[0].permute(1, 2, 0).to("cpu"), boxes) | |
if __name__ == "__main__": | |
test() |