mbarnig commited on
Commit
5041c73
1 Parent(s): e0d7a7c

Upload config_se.json

Browse files
Files changed (1) hide show
  1. config_se.json +121 -0
config_se.json ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "speaker_encoder",
3
+ "run_name": "speaker_encoder",
4
+ "run_description": "resnet speaker encoder trained with commonvoice all languages dev and train, Voxceleb 1 dev and Voxceleb 2 dev",
5
+ "epochs": 100000,
6
+ "batch_size": null,
7
+ "eval_batch_size": null,
8
+ "mixed_precision": false,
9
+ "run_eval": true,
10
+ "test_delay_epochs": 0,
11
+ "print_eval": false,
12
+ "print_step": 50,
13
+ "tb_plot_step": 100,
14
+ "tb_model_param_stats": false,
15
+ "save_step": 1000,
16
+ "checkpoint": true,
17
+ "keep_all_best": false,
18
+ "keep_after": 10000,
19
+ "num_loader_workers": 8,
20
+ "num_val_loader_workers": 0,
21
+ "use_noise_augment": false,
22
+ "output_path": "../checkpoints/speaker_encoder/language_balanced/normalized/angleproto-4-samples-by-speakers/",
23
+ "distributed_backend": "nccl",
24
+ "distributed_url": "tcp://localhost:54321",
25
+ "audio": {
26
+ "fft_size": 512,
27
+ "win_length": 400,
28
+ "hop_length": 160,
29
+ "frame_shift_ms": null,
30
+ "frame_length_ms": null,
31
+ "stft_pad_mode": "reflect",
32
+ "sample_rate": 16000,
33
+ "resample": false,
34
+ "preemphasis": 0.97,
35
+ "ref_level_db": 20,
36
+ "do_sound_norm": false,
37
+ "do_trim_silence": false,
38
+ "trim_db": 60,
39
+ "power": 1.5,
40
+ "griffin_lim_iters": 60,
41
+ "num_mels": 64,
42
+ "mel_fmin": 0.0,
43
+ "mel_fmax": 8000.0,
44
+ "spec_gain": 20,
45
+ "signal_norm": false,
46
+ "min_level_db": -100,
47
+ "symmetric_norm": false,
48
+ "max_norm": 4.0,
49
+ "clip_norm": false,
50
+ "stats_path": null,
51
+ "do_rms_norm": true,
52
+ "db_level": -27.0
53
+ },
54
+ "datasets": [
55
+ {
56
+ "name": "voxceleb2",
57
+ "path": "/workspace/scratch/ecasanova/datasets/VoxCeleb/vox2_dev_aac/",
58
+ "meta_file_train": null,
59
+ "ununsed_speakers": null,
60
+ "meta_file_val": null,
61
+ "meta_file_attn_mask": "",
62
+ "language": "voxceleb"
63
+ }
64
+ ],
65
+ "model_params": {
66
+ "model_name": "resnet",
67
+ "input_dim": 64,
68
+ "use_torch_spec": true,
69
+ "log_input": true,
70
+ "proj_dim": 512
71
+ },
72
+ "audio_augmentation": {
73
+ "p": 0.5,
74
+ "rir": {
75
+ "rir_path": "/workspace/store/ecasanova/ComParE/RIRS_NOISES/simulated_rirs/",
76
+ "conv_mode": "full"
77
+ },
78
+ "additive": {
79
+ "sounds_path": "/workspace/store/ecasanova/ComParE/musan/",
80
+ "speech": {
81
+ "min_snr_in_db": 13,
82
+ "max_snr_in_db": 20,
83
+ "min_num_noises": 1,
84
+ "max_num_noises": 1
85
+ },
86
+ "noise": {
87
+ "min_snr_in_db": 0,
88
+ "max_snr_in_db": 15,
89
+ "min_num_noises": 1,
90
+ "max_num_noises": 1
91
+ },
92
+ "music": {
93
+ "min_snr_in_db": 5,
94
+ "max_snr_in_db": 15,
95
+ "min_num_noises": 1,
96
+ "max_num_noises": 1
97
+ }
98
+ },
99
+ "gaussian": {
100
+ "p": 0.0,
101
+ "min_amplitude": 0.0,
102
+ "max_amplitude": 1e-05
103
+ }
104
+ },
105
+ "storage": {
106
+ "sample_from_storage_p": 0.5,
107
+ "storage_size": 40
108
+ },
109
+ "max_train_step": 1000000,
110
+ "loss": "angleproto",
111
+ "grad_clip": 3.0,
112
+ "lr": 0.0001,
113
+ "lr_decay": false,
114
+ "warmup_steps": 4000,
115
+ "wd": 1e-06,
116
+ "steps_plot_stats": 100,
117
+ "num_speakers_in_batch": 100,
118
+ "num_utters_per_speaker": 4,
119
+ "skip_speakers": true,
120
+ "voice_len": 2.0
121
+ }