glenn-jocher commited on
Commit
c3ae4e4
β€’
1 Parent(s): ecc2c7b

Multi-threaded VisDrone and VOC downloads (#7108)

Browse files

* Multi-threaded VOC download

* Update VOC.yaml

* Update

* Update general.py

* Update general.py

data/GlobalWheat2020.yaml CHANGED
@@ -34,6 +34,7 @@ names: ['wheat_head'] # class names
34
  download: |
35
  from utils.general import download, Path
36
 
 
37
  # Download
38
  dir = Path(yaml['path']) # dataset root dir
39
  urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
 
34
  download: |
35
  from utils.general import download, Path
36
 
37
+
38
  # Download
39
  dir = Path(yaml['path']) # dataset root dir
40
  urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
data/Objects365.yaml CHANGED
@@ -65,6 +65,7 @@ download: |
65
 
66
  from utils.general import Path, download, np, xyxy2xywhn
67
 
 
68
  # Make Directories
69
  dir = Path(yaml['path']) # dataset root dir
70
  for p in 'images', 'labels':
 
65
 
66
  from utils.general import Path, download, np, xyxy2xywhn
67
 
68
+
69
  # Make Directories
70
  dir = Path(yaml['path']) # dataset root dir
71
  for p in 'images', 'labels':
data/SKU-110K.yaml CHANGED
@@ -24,6 +24,7 @@ download: |
24
  from tqdm import tqdm
25
  from utils.general import np, pd, Path, download, xyxy2xywh
26
 
 
27
  # Download
28
  dir = Path(yaml['path']) # dataset root dir
29
  parent = Path(dir.parent) # download dir
 
24
  from tqdm import tqdm
25
  from utils.general import np, pd, Path, download, xyxy2xywh
26
 
27
+
28
  # Download
29
  dir = Path(yaml['path']) # dataset root dir
30
  parent = Path(dir.parent) # download dir
data/VOC.yaml CHANGED
@@ -62,7 +62,7 @@ download: |
62
  urls = [url + 'VOCtrainval_06-Nov-2007.zip', # 446MB, 5012 images
63
  url + 'VOCtest_06-Nov-2007.zip', # 438MB, 4953 images
64
  url + 'VOCtrainval_11-May-2012.zip'] # 1.95GB, 17126 images
65
- download(urls, dir=dir / 'images', delete=False)
66
 
67
  # Convert
68
  path = dir / f'images/VOCdevkit'
 
62
  urls = [url + 'VOCtrainval_06-Nov-2007.zip', # 446MB, 5012 images
63
  url + 'VOCtest_06-Nov-2007.zip', # 438MB, 4953 images
64
  url + 'VOCtrainval_11-May-2012.zip'] # 1.95GB, 17126 images
65
+ download(urls, dir=dir / 'images', delete=False, threads=3)
66
 
67
  # Convert
68
  path = dir / f'images/VOCdevkit'
data/VisDrone.yaml CHANGED
@@ -54,7 +54,7 @@ download: |
54
  'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
55
  'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',
56
  'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip']
57
- download(urls, dir=dir)
58
 
59
  # Convert
60
  for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':
 
54
  'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
55
  'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',
56
  'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip']
57
+ download(urls, dir=dir, threads=4)
58
 
59
  # Convert
60
  for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':
data/coco.yaml CHANGED
@@ -30,6 +30,7 @@ names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 't
30
  download: |
31
  from utils.general import download, Path
32
 
 
33
  # Download labels
34
  segments = False # segment or box labels
35
  dir = Path(yaml['path']) # dataset root dir
 
30
  download: |
31
  from utils.general import download, Path
32
 
33
+
34
  # Download labels
35
  segments = False # segment or box labels
36
  dir = Path(yaml['path']) # dataset root dir
utils/general.py CHANGED
@@ -449,8 +449,9 @@ def check_dataset(data, autodownload=True):
449
  if val:
450
  val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
451
  if not all(x.exists() for x in val):
452
- LOGGER.info('\nDataset not found, missing paths: %s' % [str(x) for x in val if not x.exists()])
453
  if s and autodownload: # download script
 
454
  root = path.parent if 'path' in data else '..' # unzip directory i.e. '../'
455
  if s.startswith('http') and s.endswith('.zip'): # URL
456
  f = Path(s).name # filename
@@ -465,9 +466,11 @@ def check_dataset(data, autodownload=True):
465
  r = os.system(s)
466
  else: # python script
467
  r = exec(s, {'yaml': data}) # return None
468
- LOGGER.info(f"Dataset autodownload {f'success, saved to {root}' if r in (0, None) else 'failure'}\n")
 
 
469
  else:
470
- raise Exception('Dataset not found.')
471
 
472
  return data # dictionary
473
 
@@ -491,7 +494,7 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1):
491
  if curl:
492
  os.system(f"curl -L '{url}' -o '{f}' --retry 9 -C -") # curl download, retry and resume on fail
493
  else:
494
- torch.hub.download_url_to_file(url, f, progress=True) # torch download
495
  if unzip and f.suffix in ('.zip', '.gz'):
496
  LOGGER.info(f'Unzipping {f}...')
497
  if f.suffix == '.zip':
 
449
  if val:
450
  val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
451
  if not all(x.exists() for x in val):
452
+ LOGGER.info(emojis('\nDataset not found ⚠️, missing paths %s' % [str(x) for x in val if not x.exists()]))
453
  if s and autodownload: # download script
454
+ t = time.time()
455
  root = path.parent if 'path' in data else '..' # unzip directory i.e. '../'
456
  if s.startswith('http') and s.endswith('.zip'): # URL
457
  f = Path(s).name # filename
 
466
  r = os.system(s)
467
  else: # python script
468
  r = exec(s, {'yaml': data}) # return None
469
+ dt = f'({round(time.time() - t, 1)}s)'
470
+ s = f"success βœ… {dt}, saved to {colorstr('bold', root)}" if r in (0, None) else f"failure {dt} ❌"
471
+ LOGGER.info(emojis(f"Dataset download {s}"))
472
  else:
473
+ raise Exception(emojis('Dataset not found ❌'))
474
 
475
  return data # dictionary
476
 
 
494
  if curl:
495
  os.system(f"curl -L '{url}' -o '{f}' --retry 9 -C -") # curl download, retry and resume on fail
496
  else:
497
+ torch.hub.download_url_to_file(url, f, progress=threads == 1) # torch download
498
  if unzip and f.suffix in ('.zip', '.gz'):
499
  LOGGER.info(f'Unzipping {f}...')
500
  if f.suffix == '.zip':