glenn-jocher commited on
Commit
9c513ca
1 Parent(s): f40854b

Add `DATASETS_DIR` global in general.py (#6578)

Browse files
Files changed (2) hide show
  1. utils/datasets.py +6 -6
  2. utils/general.py +3 -2
utils/datasets.py CHANGED
@@ -27,7 +27,7 @@ from torch.utils.data import DataLoader, Dataset, dataloader, distributed
27
  from tqdm import tqdm
28
 
29
  from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective
30
- from utils.general import (LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str,
31
  segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn)
32
  from utils.torch_utils import torch_distributed_zero_first
33
 
@@ -817,15 +817,15 @@ def create_folder(path='./new'):
817
  os.makedirs(path) # make new output folder
818
 
819
 
820
- def flatten_recursive(path='../datasets/coco128'):
821
  # Flatten a recursive directory by bringing all files to top level
822
- new_path = Path(path + '_flat')
823
  create_folder(new_path)
824
  for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)):
825
  shutil.copyfile(file, new_path / Path(file).name)
826
 
827
 
828
- def extract_boxes(path='../datasets/coco128'): # from utils.datasets import *; extract_boxes()
829
  # Convert detection dataset into classification dataset, with one directory per class
830
  path = Path(path) # images dir
831
  shutil.rmtree(path / 'classifier') if (path / 'classifier').is_dir() else None # remove existing
@@ -859,7 +859,7 @@ def extract_boxes(path='../datasets/coco128'): # from utils.datasets import *;
859
  assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}'
860
 
861
 
862
- def autosplit(path='../datasets/coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False):
863
  """ Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
864
  Usage: from utils.datasets import *; autosplit()
865
  Arguments
@@ -939,7 +939,7 @@ def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False, profil
939
  """ Return dataset statistics dictionary with images and instances counts per split per class
940
  To run in parent directory: export PYTHONPATH="$PWD/yolov5"
941
  Usage1: from utils.datasets import *; dataset_stats('coco128.yaml', autodownload=True)
942
- Usage2: from utils.datasets import *; dataset_stats('../datasets/coco128_with_yaml.zip')
943
  Arguments
944
  path: Path to data.yaml or data.zip (with data.yaml inside data.zip)
945
  autodownload: Attempt to download dataset if not found locally
 
27
  from tqdm import tqdm
28
 
29
  from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective
30
+ from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str,
31
  segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn)
32
  from utils.torch_utils import torch_distributed_zero_first
33
 
 
817
  os.makedirs(path) # make new output folder
818
 
819
 
820
+ def flatten_recursive(path=DATASETS_DIR / 'coco128'):
821
  # Flatten a recursive directory by bringing all files to top level
822
+ new_path = Path(str(path) + '_flat')
823
  create_folder(new_path)
824
  for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)):
825
  shutil.copyfile(file, new_path / Path(file).name)
826
 
827
 
828
+ def extract_boxes(path=DATASETS_DIR / 'coco128'): # from utils.datasets import *; extract_boxes()
829
  # Convert detection dataset into classification dataset, with one directory per class
830
  path = Path(path) # images dir
831
  shutil.rmtree(path / 'classifier') if (path / 'classifier').is_dir() else None # remove existing
 
859
  assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}'
860
 
861
 
862
+ def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False):
863
  """ Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
864
  Usage: from utils.datasets import *; autosplit()
865
  Arguments
 
939
  """ Return dataset statistics dictionary with images and instances counts per split per class
940
  To run in parent directory: export PYTHONPATH="$PWD/yolov5"
941
  Usage1: from utils.datasets import *; dataset_stats('coco128.yaml', autodownload=True)
942
+ Usage2: from utils.datasets import *; dataset_stats('path/to/coco128_with_yaml.zip')
943
  Arguments
944
  path: Path to data.yaml or data.zip (with data.yaml inside data.zip)
945
  autodownload: Attempt to download dataset if not found locally
utils/general.py CHANGED
@@ -35,6 +35,7 @@ from utils.metrics import box_iou, fitness
35
  # Settings
36
  FILE = Path(__file__).resolve()
37
  ROOT = FILE.parents[1] # YOLOv5 root directory
 
38
  NUM_THREADS = min(8, max(1, os.cpu_count() - 1)) # number of YOLOv5 multiprocessing threads
39
  VERBOSE = str(os.getenv('YOLOv5_VERBOSE', True)).lower() == 'true' # global verbose mode
40
  FONT = 'Arial.ttf' # https://ultralytics.com/assets/Arial.ttf
@@ -398,8 +399,8 @@ def check_dataset(data, autodownload=True):
398
  # Download (optional)
399
  extract_dir = ''
400
  if isinstance(data, (str, Path)) and str(data).endswith('.zip'): # i.e. gs://bucket/dir/coco128.zip
401
- download(data, dir='../datasets', unzip=True, delete=False, curl=False, threads=1)
402
- data = next((Path('../datasets') / Path(data).stem).rglob('*.yaml'))
403
  extract_dir, autodownload = data.parent, False
404
 
405
  # Read yaml (optional)
 
35
  # Settings
36
  FILE = Path(__file__).resolve()
37
  ROOT = FILE.parents[1] # YOLOv5 root directory
38
+ DATASETS_DIR = ROOT.parent / 'datasets' # YOLOv5 datasets directory
39
  NUM_THREADS = min(8, max(1, os.cpu_count() - 1)) # number of YOLOv5 multiprocessing threads
40
  VERBOSE = str(os.getenv('YOLOv5_VERBOSE', True)).lower() == 'true' # global verbose mode
41
  FONT = 'Arial.ttf' # https://ultralytics.com/assets/Arial.ttf
 
399
  # Download (optional)
400
  extract_dir = ''
401
  if isinstance(data, (str, Path)) and str(data).endswith('.zip'): # i.e. gs://bucket/dir/coco128.zip
402
+ download(data, dir=DATASETS_DIR, unzip=True, delete=False, curl=False, threads=1)
403
+ data = next((DATASETS_DIR / Path(data).stem).rglob('*.yaml'))
404
  extract_dir, autodownload = data.parent, False
405
 
406
  # Read yaml (optional)