glenn-jocher
commited on
Commit
•
4798e66
1
Parent(s):
0a3ff71
Autosplit (#1488)
Browse files- utils/datasets.py +17 -0
utils/datasets.py
CHANGED
@@ -902,3 +902,20 @@ def flatten_recursive(path='../coco128'):
|
|
902 |
create_folder(new_path)
|
903 |
for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)):
|
904 |
shutil.copyfile(file, new_path / Path(file).name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
902 |
create_folder(new_path)
|
903 |
for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)):
|
904 |
shutil.copyfile(file, new_path / Path(file).name)
|
905 |
+
|
906 |
+
|
907 |
+
def autosplit(path='../coco128', weights=(0.9, 0.1, 0.0)): # from utils.datasets import *; autosplit()
|
908 |
+
""" Autosplit a dataset into train/val/test splits and save *.txt files
|
909 |
+
# Arguments
|
910 |
+
path: Path to images directory
|
911 |
+
weights: Train, val, test weights (list)
|
912 |
+
"""
|
913 |
+
path = Path(path) # images dir
|
914 |
+
files = list(path.rglob('*.*'))
|
915 |
+
indices = random.choices([0, 1, 2], weights=weights, k=len(files)) # assign each image to a split
|
916 |
+
txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt'] # 3 txt files
|
917 |
+
[(path / x).unlink() for x in txt if (path / x).exists()] # remove existing
|
918 |
+
for i, img in tqdm(zip(indices, files)):
|
919 |
+
if img.suffix[1:] in img_formats:
|
920 |
+
with open(path / txt[i], 'a') as f:
|
921 |
+
f.write(str(img) + '\n') # add image to txt file
|