gujarati-tisv / config /config.yaml
Irsh Vijayvargia
Add application file
42a4544
training: !!bool "false"
device: "mps"
unprocessed_data: './DATA_DIR/*/*.wav'
---
data:
train_path: './train_tisv'
train_path_unprocessed: './TIMIT/TRAIN/*/*/*.wav'
test_path: './test_tisv'
test_path_unprocessed: './TIMIT/TEST/*/*/*.wav'
data_preprocessed: !!bool "true"
sr: 16000
nfft: 512 #For mel spectrogram preprocess
window: 0.025 #(s)
hop: 0.01 #(s)
nmels: 40 #Number of mel energies
tisv_frame: 180 #Max number of time steps in input after preprocess
---
model:
hidden: 768 #Number of LSTM hidden layer units
num_layer: 3 #Number of LSTM layers
proj: 256 #Embedding size
model_path: './speech_id_checkpoint/ckpt_epoch_840_batch_id_6.pth' #Model path for testing, inference, or resuming training
---
train:
N : 4 #Number of speakers in batch
M : 6 #Number of utterances per speaker
num_workers: 0 #number of workers for dataloader
lr: 0.01
epochs: 1000 #Max training speaker epoch
log_interval: 30 #Epochs before printing progress
log_file: './speech_id_checkpoint/Stats'
checkpoint_interval: 100 #Save model after x speaker epochs
checkpoint_dir: './speech_id_checkpoint'
restore: !!bool "true" #Resume training from previous model path
---
test:
N : 4 #Number of speakers in batch
M : 6 #Number of utterances per speaker
num_workers: 8 #number of workers for data laoder
epochs: 10 #testing speaker epochs