Spaces:
Runtime error
Runtime error
import kaldiio | |
import os | |
import librosa | |
from tqdm import tqdm | |
import glob | |
import json | |
from shutil import copyfile | |
import pandas as pd | |
import argparse | |
from text import _clean_text, symbols | |
from num2words import num2words | |
import re | |
from melspec import mel_spectrogram | |
import torchaudio | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument('-d', '--data', type=str, required=True, help='path to the emotional dataset') | |
args = parser.parse_args() | |
dataset_path = args.data | |
filelists_path = 'filelists/all_spks/' | |
feats_scp_file = filelists_path + 'feats.scp' | |
feats_ark_file = filelists_path + 'feats.ark' | |
spks = ['1263201035', '805570882', '399172782'] | |
train_files = [] | |
eval_files = [] | |
for spk in spks: | |
train_files += glob.glob(dataset_path + spk + "/train/*.wav") | |
eval_files += glob.glob(dataset_path + spk + "/eval/*.wav") | |
os.makedirs(filelists_path, exist_ok=True) | |
with open(filelists_path + 'train_utts.txt', 'w', encoding='utf-8') as f: | |
for wav_path in train_files: | |
wav_name = os.path.splitext(os.path.basename(wav_path))[0] | |
f.write(wav_name + '\n') | |
with open(filelists_path + 'eval_utts.txt', 'w', encoding='utf-8') as f: | |
for wav_path in eval_files: | |
wav_name = os.path.splitext(os.path.basename(wav_path))[0] | |
f.write(wav_name + '\n') | |
with open(feats_scp_file, 'w') as feats_scp, \ | |
kaldiio.WriteHelper(f'ark,scp:{feats_ark_file},{feats_scp_file}') as writer: | |
for root, dirs, files in os.walk(dataset_path): | |
for file in tqdm(files): | |
if file.endswith('.wav'): | |
# Get the file name and relative path to the root folder | |
wav_path = os.path.join(root, file) | |
rel_path = os.path.relpath(wav_path, dataset_path) | |
wav_name = os.path.splitext(os.path.basename(wav_path))[0] | |
signal, rate = torchaudio.load(wav_path) | |
spec = mel_spectrogram(signal, 1024, 80, 22050, 256, | |
1024, 0, 8000, center=False).squeeze() | |
# Write the features to feats.ark and feats.scp | |
writer[wav_name] = spec | |
emotions = [os.path.basename(x).split("_")[1] for x in glob.glob(dataset_path + '/**/**/*')] | |
emotions = sorted(set(emotions)) | |
utt2spk = {} | |
utt2emo = {} | |
wavs = glob.glob(dataset_path + '**/**/*.wav') | |
for wav_path in tqdm(wavs): | |
wav_name = os.path.splitext(os.path.basename(wav_path))[0] | |
emotion = emotions.index(wav_name.split("_")[1]) | |
if wav_path.split('/')[-3] == '1263201035': | |
spk = 0 ## labels should start with 0 | |
elif wav_path.split('/')[-3] == '805570882': | |
spk = 1 | |
else: | |
spk = 2 | |
utt2spk[wav_name] = str(spk) | |
utt2emo[wav_name] = str(emotion) | |
utt2spk = dict(sorted(utt2spk.items())) | |
utt2emo = dict(sorted(utt2emo.items())) | |
with open(filelists_path + 'utt2emo.json', 'w') as fp: | |
json.dump(utt2emo, fp, indent=4) | |
with open(filelists_path + 'utt2spk.json', 'w') as fp: | |
json.dump(utt2spk, fp, indent=4) | |
txt_files = sorted(glob.glob(dataset_path + '/**/**/*.txt')) | |
count = 0 | |
txt = [] | |
basenames = [] | |
utt2text = {} | |
flag = False | |
with open(filelists_path + 'text', 'w', encoding='utf-8') as write: | |
for txt_path in txt_files: | |
basename = os.path.basename(txt_path).replace('.txt', '') | |
with open(txt_path, 'r', encoding='utf-8') as f: | |
txt.append(_clean_text(f.read().strip("\n"), cleaner_names=["kazakh_cleaners"]).replace("'", "")) | |
basenames.append(basename) | |
output_string = [re.sub('(\d+)', lambda m: num2words(m.group(), lang='kz'), sentence) for sentence in txt] | |
cleaned_txt = [] | |
for t in output_string: | |
cleaned_txt.append(''.join([s for s in t if s in symbols])) | |
utt2text = {basenames[i]: cleaned_txt[i] for i in range(len(cleaned_txt))} | |
utt2text = dict(sorted(utt2text.items())) | |
vocab = set() | |
with open(filelists_path + '/text', 'w', encoding='utf-8') as f: | |
for x, y in utt2text.items(): | |
for c in y: vocab.add(c) | |
f.write(x + ' ' + y + '\n') | |