Spaces:
Sleeping
Sleeping
Hugo Flores Garcia
commited on
Commit
•
cf172ac
1
Parent(s):
09b9691
update splits, reqs
Browse files- scripts/utils/split.py +17 -4
- setup.py +2 -1
scripts/utils/split.py
CHANGED
@@ -1,8 +1,12 @@
|
|
1 |
from pathlib import Path
|
2 |
import random
|
3 |
import shutil
|
|
|
|
|
4 |
|
5 |
import argbind
|
|
|
|
|
6 |
|
7 |
from audiotools.core import util
|
8 |
|
@@ -12,8 +16,13 @@ def train_test_split(
|
|
12 |
audio_folder: str = ".",
|
13 |
test_size: float = 0.2,
|
14 |
seed: int = 42,
|
|
|
15 |
):
|
16 |
-
|
|
|
|
|
|
|
|
|
17 |
|
18 |
# split according to test_size
|
19 |
n_test = int(len(audio_files) * test_size)
|
@@ -37,10 +46,14 @@ def train_test_split(
|
|
37 |
for split, files in (
|
38 |
("train", train_files), ("test", test_files)
|
39 |
):
|
40 |
-
for file in files:
|
41 |
-
out_file =
|
42 |
out_file.parent.mkdir(exist_ok=True, parents=True)
|
43 |
-
|
|
|
|
|
|
|
|
|
44 |
|
45 |
|
46 |
|
|
|
1 |
from pathlib import Path
|
2 |
import random
|
3 |
import shutil
|
4 |
+
import os
|
5 |
+
import json
|
6 |
|
7 |
import argbind
|
8 |
+
from tqdm import tqdm
|
9 |
+
from tqdm.contrib.concurrent import thread_map
|
10 |
|
11 |
from audiotools.core import util
|
12 |
|
|
|
16 |
audio_folder: str = ".",
|
17 |
test_size: float = 0.2,
|
18 |
seed: int = 42,
|
19 |
+
pattern: str = "**/*.mp3",
|
20 |
):
|
21 |
+
print(f"finding audio")
|
22 |
+
|
23 |
+
audio_folder = Path(audio_folder)
|
24 |
+
audio_files = list(tqdm(audio_folder.glob(pattern)))
|
25 |
+
print(f"found {len(audio_files)} audio files")
|
26 |
|
27 |
# split according to test_size
|
28 |
n_test = int(len(audio_files) * test_size)
|
|
|
46 |
for split, files in (
|
47 |
("train", train_files), ("test", test_files)
|
48 |
):
|
49 |
+
for file in tqdm(files):
|
50 |
+
out_file = audio_folder.parent / f"{audio_folder.name}-{split}" / Path(file).name
|
51 |
out_file.parent.mkdir(exist_ok=True, parents=True)
|
52 |
+
os.symlink(file, out_file)
|
53 |
+
|
54 |
+
# save split as json
|
55 |
+
with open(Path(audio_folder) / f"{split}.json", "w") as f:
|
56 |
+
json.dump([str(f) for f in files], f)
|
57 |
|
58 |
|
59 |
|
setup.py
CHANGED
@@ -39,6 +39,7 @@ setup(
|
|
39 |
"google-cloud-logging==2.2.0",
|
40 |
"einops",
|
41 |
# "frechet_audio_distance",
|
42 |
-
"gradio"
|
|
|
43 |
],
|
44 |
)
|
|
|
39 |
"google-cloud-logging==2.2.0",
|
40 |
"einops",
|
41 |
# "frechet_audio_distance",
|
42 |
+
"gradio",
|
43 |
+
"tensorboardX",
|
44 |
],
|
45 |
)
|