Spaces:
Sleeping
Sleeping
File size: 2,595 Bytes
1eabf9f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import os
import json
import sys
import librosa
def traverse_dir(
root_dir,
extension,
amount=None,
str_include=None,
str_exclude=None,
is_pure=False,
is_sort=False,
is_ext=True):
file_list = []
cnt = 0
for root, _, files in os.walk(root_dir):
for file in files:
if file.endswith(extension):
# path
mix_path = os.path.join(root, file)
pure_path = mix_path[len(root_dir)+1:] if is_pure else mix_path
# amount
if (amount is not None) and (cnt == amount):
if is_sort:
file_list.sort()
return file_list
# check string
if (str_include is not None) and (str_include not in pure_path):
continue
if (str_exclude is not None) and (str_exclude in pure_path):
continue
if not is_ext:
ext = pure_path.split('.')[-1]
pure_path = pure_path[:-(len(ext)+1)]
file_list.append(pure_path)
cnt += 1
if is_sort:
file_list.sort()
return file_list
if __name__ == '__main__':
root_dir = '../audiocraft/dataset/example/clip'
path_jsonl = '../audiocraft/egs/example/data.jsonl'
filelist = traverse_dir(
root_dir,
extension='wav',
str_include='no_vocal',
is_sort=True)
num_files = len(filelist)
with open(path_jsonl, "w") as train_file:
for fidx in range(num_files):
print(f'==={fidx}/{num_files}================')
path_wave = filelist[fidx]
path_json = os.path.join(
os.path.dirname(path_wave), 'tags.json')
sr = librosa.get_samplerate(path_wave)
print('path_wave:', path_wave)
print('path_json:', path_json)
with open(path_json, 'r') as f:
data = json.load(f)
assert sr == data['sample_rate']
final = {
'path': data['path'],
'duration': data['duration'],
"sample_rate": data['sample_rate'],
"bpm": data['bpm'],
"amplitude": None,
"weight": None,
"info_path": None
}
train_file.write(json.dumps(final) + '\n')
print('\n\n\n==================')
print('num files:', num_files)
|