glenn-jocher
commited on
Commit
•
8c6f9e1
1
Parent(s):
850970e
Update `dataset_stats()` for zipped datasets (#3926)
Browse files* Update `dataset_stats()` for zipped datasets
@KalenMike
* cleanup
- utils/datasets.py +17 -3
utils/datasets.py
CHANGED
@@ -888,9 +888,11 @@ def verify_image_label(args):
|
|
888 |
|
889 |
def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False):
|
890 |
""" Return dataset statistics dictionary with images and instances counts per split per class
|
891 |
-
|
|
|
|
|
892 |
Arguments
|
893 |
-
path: Path to data.yaml
|
894 |
autodownload: Attempt to download dataset if not found locally
|
895 |
verbose: Print stats dictionary
|
896 |
"""
|
@@ -899,8 +901,20 @@ def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False):
|
|
899 |
# Update labels to integer class and 6 decimal place floats
|
900 |
return [[int(c), *[round(x, 6) for x in points]] for c, *points in labels]
|
901 |
|
902 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
903 |
data = yaml.safe_load(f) # data dict
|
|
|
|
|
904 |
check_dataset(data, autodownload) # download dataset if missing
|
905 |
nc = data['nc'] # number of classes
|
906 |
stats = {'nc': nc, 'names': data['names']} # statistics dictionary
|
|
|
888 |
|
889 |
def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False):
|
890 |
""" Return dataset statistics dictionary with images and instances counts per split per class
|
891 |
+
Usage1: from utils.datasets import *; dataset_stats('coco128.yaml', verbose=True)
|
892 |
+
Usage2: from utils.datasets import *; dataset_stats('../datasets/coco128.zip', verbose=True)
|
893 |
+
|
894 |
Arguments
|
895 |
+
path: Path to data.yaml or data.zip (with data.yaml inside data.zip)
|
896 |
autodownload: Attempt to download dataset if not found locally
|
897 |
verbose: Print stats dictionary
|
898 |
"""
|
|
|
901 |
# Update labels to integer class and 6 decimal place floats
|
902 |
return [[int(c), *[round(x, 6) for x in points]] for c, *points in labels]
|
903 |
|
904 |
+
def unzip(path):
|
905 |
+
# Unzip data.zip TODO: CONSTRAINT: path/to/abc.zip MUST unzip to 'path/to/abc/'
|
906 |
+
if str(path).endswith('.zip'): # path is data.zip
|
907 |
+
assert os.system(f'unzip -q {path} -d {path.parent}') == 0, f'Error unzipping {path}'
|
908 |
+
data_dir = path.with_suffix('') # dataset directory
|
909 |
+
return True, data_dir, list(data_dir.rglob('*.yaml'))[0] # zipped, data_dir, yaml_path
|
910 |
+
else: # path is data.yaml
|
911 |
+
return False, None, path
|
912 |
+
|
913 |
+
zipped, data_dir, yaml_path = unzip(Path(path))
|
914 |
+
with open(check_file(yaml_path)) as f:
|
915 |
data = yaml.safe_load(f) # data dict
|
916 |
+
if zipped:
|
917 |
+
data['path'] = data_dir # TODO: should this be dir.resolve()?
|
918 |
check_dataset(data, autodownload) # download dataset if missing
|
919 |
nc = data['nc'] # number of classes
|
920 |
stats = {'nc': nc, 'names': data['names']} # statistics dictionary
|