File size: 1,744 Bytes
cac4808 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
model:
_target_: pyannote.audio.models.embedding.WeSpeakerResNet293
sample_rate: 16000
num_channels: 1
num_mel_bins: 80
frame_length: 25
frame_shift: 10
dither: 1.0
window_type: hamming
use_energy: false
model_args:
embed_dim: 256
feat_dim: 80
pooling_func: TSTP
two_emb_layer: false
data_type: shard
dataloader_args:
batch_size: 32
drop_last: true
num_workers: 16
pin_memory: false
prefetch_factor: 8
dataset_args:
aug_prob: 0.6
fbank_args:
dither: 1.0
frame_length: 25
frame_shift: 10
num_mel_bins: 80
num_frms: 200
shuffle: true
shuffle_args:
shuffle_size: 2500
spec_aug: false
spec_aug_args:
max_f: 8
max_t: 10
num_f_mask: 1
num_t_mask: 1
prob: 0.6
speed_perturb: true
exp_dir: exp/ResNet293-TSTP-emb256-fbank80-num_frms200-aug0.6-spTrue-saFalse-ArcMargin-SGD-epoch150
gpus:
- 0
- 1
log_batch_interval: 100
loss: CrossEntropyLoss
loss_args: {}
margin_scheduler: MarginScheduler
margin_update:
epoch_iter: 17062
final_margin: 0.2
fix_start_epoch: 40
increase_start_epoch: 20
increase_type: exp
initial_margin: 0.0
update_margin: true
model_init: null
noise_data: data/musan/lmdb
num_avg: 2
num_epochs: 150
optimizer: SGD
optimizer_args:
lr: 0.1
momentum: 0.9
nesterov: true
weight_decay: 0.0001
projection_args:
easy_margin: false
embed_dim: 256
num_class: 17982
project_type: arc_margin
scale: 32.0
reverb_data: data/rirs/lmdb
save_epoch_interval: 5
scheduler: ExponentialDecrease
scheduler_args:
epoch_iter: 17062
final_lr: 5.0e-05
initial_lr: 0.1
num_epochs: 150
scale_ratio: 1.0
warm_from_zero: true
warm_up_epoch: 6
seed: 42
train_data: data/vox2_dev/shard.list
train_label: data/vox2_dev/utt2spk
|