glenn-jocher
commited on
Commit
•
9c513ca
1
Parent(s):
f40854b
Add `DATASETS_DIR` global in general.py (#6578)
Browse files- utils/datasets.py +6 -6
- utils/general.py +3 -2
utils/datasets.py
CHANGED
@@ -27,7 +27,7 @@ from torch.utils.data import DataLoader, Dataset, dataloader, distributed
|
|
27 |
from tqdm import tqdm
|
28 |
|
29 |
from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective
|
30 |
-
from utils.general import (LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str,
|
31 |
segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn)
|
32 |
from utils.torch_utils import torch_distributed_zero_first
|
33 |
|
@@ -817,15 +817,15 @@ def create_folder(path='./new'):
|
|
817 |
os.makedirs(path) # make new output folder
|
818 |
|
819 |
|
820 |
-
def flatten_recursive(path='
|
821 |
# Flatten a recursive directory by bringing all files to top level
|
822 |
-
new_path = Path(path + '_flat')
|
823 |
create_folder(new_path)
|
824 |
for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)):
|
825 |
shutil.copyfile(file, new_path / Path(file).name)
|
826 |
|
827 |
|
828 |
-
def extract_boxes(path='
|
829 |
# Convert detection dataset into classification dataset, with one directory per class
|
830 |
path = Path(path) # images dir
|
831 |
shutil.rmtree(path / 'classifier') if (path / 'classifier').is_dir() else None # remove existing
|
@@ -859,7 +859,7 @@ def extract_boxes(path='../datasets/coco128'): # from utils.datasets import *;
|
|
859 |
assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}'
|
860 |
|
861 |
|
862 |
-
def autosplit(path='
|
863 |
""" Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
|
864 |
Usage: from utils.datasets import *; autosplit()
|
865 |
Arguments
|
@@ -939,7 +939,7 @@ def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False, profil
|
|
939 |
""" Return dataset statistics dictionary with images and instances counts per split per class
|
940 |
To run in parent directory: export PYTHONPATH="$PWD/yolov5"
|
941 |
Usage1: from utils.datasets import *; dataset_stats('coco128.yaml', autodownload=True)
|
942 |
-
Usage2: from utils.datasets import *; dataset_stats('
|
943 |
Arguments
|
944 |
path: Path to data.yaml or data.zip (with data.yaml inside data.zip)
|
945 |
autodownload: Attempt to download dataset if not found locally
|
|
|
27 |
from tqdm import tqdm
|
28 |
|
29 |
from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective
|
30 |
+
from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str,
|
31 |
segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn)
|
32 |
from utils.torch_utils import torch_distributed_zero_first
|
33 |
|
|
|
817 |
os.makedirs(path) # make new output folder
|
818 |
|
819 |
|
820 |
+
def flatten_recursive(path=DATASETS_DIR / 'coco128'):
|
821 |
# Flatten a recursive directory by bringing all files to top level
|
822 |
+
new_path = Path(str(path) + '_flat')
|
823 |
create_folder(new_path)
|
824 |
for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)):
|
825 |
shutil.copyfile(file, new_path / Path(file).name)
|
826 |
|
827 |
|
828 |
+
def extract_boxes(path=DATASETS_DIR / 'coco128'): # from utils.datasets import *; extract_boxes()
|
829 |
# Convert detection dataset into classification dataset, with one directory per class
|
830 |
path = Path(path) # images dir
|
831 |
shutil.rmtree(path / 'classifier') if (path / 'classifier').is_dir() else None # remove existing
|
|
|
859 |
assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}'
|
860 |
|
861 |
|
862 |
+
def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False):
|
863 |
""" Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
|
864 |
Usage: from utils.datasets import *; autosplit()
|
865 |
Arguments
|
|
|
939 |
""" Return dataset statistics dictionary with images and instances counts per split per class
|
940 |
To run in parent directory: export PYTHONPATH="$PWD/yolov5"
|
941 |
Usage1: from utils.datasets import *; dataset_stats('coco128.yaml', autodownload=True)
|
942 |
+
Usage2: from utils.datasets import *; dataset_stats('path/to/coco128_with_yaml.zip')
|
943 |
Arguments
|
944 |
path: Path to data.yaml or data.zip (with data.yaml inside data.zip)
|
945 |
autodownload: Attempt to download dataset if not found locally
|
utils/general.py
CHANGED
@@ -35,6 +35,7 @@ from utils.metrics import box_iou, fitness
|
|
35 |
# Settings
|
36 |
FILE = Path(__file__).resolve()
|
37 |
ROOT = FILE.parents[1] # YOLOv5 root directory
|
|
|
38 |
NUM_THREADS = min(8, max(1, os.cpu_count() - 1)) # number of YOLOv5 multiprocessing threads
|
39 |
VERBOSE = str(os.getenv('YOLOv5_VERBOSE', True)).lower() == 'true' # global verbose mode
|
40 |
FONT = 'Arial.ttf' # https://ultralytics.com/assets/Arial.ttf
|
@@ -398,8 +399,8 @@ def check_dataset(data, autodownload=True):
|
|
398 |
# Download (optional)
|
399 |
extract_dir = ''
|
400 |
if isinstance(data, (str, Path)) and str(data).endswith('.zip'): # i.e. gs://bucket/dir/coco128.zip
|
401 |
-
download(data, dir=
|
402 |
-
data = next((
|
403 |
extract_dir, autodownload = data.parent, False
|
404 |
|
405 |
# Read yaml (optional)
|
|
|
35 |
# Settings
|
36 |
FILE = Path(__file__).resolve()
|
37 |
ROOT = FILE.parents[1] # YOLOv5 root directory
|
38 |
+
DATASETS_DIR = ROOT.parent / 'datasets' # YOLOv5 datasets directory
|
39 |
NUM_THREADS = min(8, max(1, os.cpu_count() - 1)) # number of YOLOv5 multiprocessing threads
|
40 |
VERBOSE = str(os.getenv('YOLOv5_VERBOSE', True)).lower() == 'true' # global verbose mode
|
41 |
FONT = 'Arial.ttf' # https://ultralytics.com/assets/Arial.ttf
|
|
|
399 |
# Download (optional)
|
400 |
extract_dir = ''
|
401 |
if isinstance(data, (str, Path)) and str(data).endswith('.zip'): # i.e. gs://bucket/dir/coco128.zip
|
402 |
+
download(data, dir=DATASETS_DIR, unzip=True, delete=False, curl=False, threads=1)
|
403 |
+
data = next((DATASETS_DIR / Path(data).stem).rglob('*.yaml'))
|
404 |
extract_dir, autodownload = data.parent, False
|
405 |
|
406 |
# Read yaml (optional)
|