ESPnet
audio
self-supervised-learning
speech-recognition
William Chen commited on
Commit
c4a45e3
1 Parent(s): 5326bc1
Files changed (19) hide show
  1. .gitattributes +1 -0
  2. exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/5epoch.pth +3 -0
  3. exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/config.yaml +673 -0
  4. exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/acc_m.png +0 -0
  5. exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/acc_u.png +0 -0
  6. exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/backward_time.png +0 -0
  7. exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/correct_m.png +0 -0
  8. exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/correct_u.png +0 -0
  9. exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/count_m.png +0 -0
  10. exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/count_u.png +0 -0
  11. exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/forward_time.png +0 -0
  12. exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/gpu_max_cached_mem_GB.png +0 -0
  13. exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/iter_time.png +0 -0
  14. exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/loss.png +0 -0
  15. exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/optim0_lr0.png +0 -0
  16. exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/optim_step_time.png +0 -0
  17. exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/train_time.png +0 -0
  18. exp_li/kmeans_iter2_hubert_train_li110_lid_portion0.1/km_500.mdl +3 -0
  19. meta.yaml +8 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ exp_li/kmeans_iter2_hubert_train_li110_lid_portion0.1/km_500.mdl filter=lfs diff=lfs merge=lfs -text
exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/5epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:572ca9db821b4df2653f5c6d1d4d265723907fb82bbbb1f7dc6b9a09dae95110
3
+ size 1266621718
exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/config.yaml ADDED
@@ -0,0 +1,673 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 4
10
+ num_att_plot: 0
11
+ dist_backend: nccl
12
+ dist_init_method: file:///scratch/bbjs/chen26/espnet_01_23/egs2/librispeech/ssl1/exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/.dist_init_9aa63297-5bd5-4a55-8431-9f73518677ea
13
+ dist_world_size: 24
14
+ dist_rank: 0
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: slurm
19
+ multiprocessing_distributed: true
20
+ unused_parameters: true
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 10
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - acc_m
39
+ - max
40
+ keep_nbest_models: 5
41
+ nbest_averaging_interval: 0
42
+ grad_clip: 5.0
43
+ grad_clip_type: 2.0
44
+ grad_noise: false
45
+ accum_grad: 1
46
+ no_forward_run: false
47
+ resume: true
48
+ train_dtype: float32
49
+ use_amp: true
50
+ drop_last: true
51
+ debug_grad: false
52
+ log_interval: null
53
+ use_matplotlib: true
54
+ use_tensorboard: true
55
+ create_graph_in_tensorboard: false
56
+ use_wandb: false
57
+ wandb_project: null
58
+ wandb_id: null
59
+ wandb_entity: null
60
+ wandb_name: null
61
+ wandb_model_log_interval: -1
62
+ detect_anomaly: false
63
+ pretrain_path: null
64
+ init_param:
65
+ - /scratch/bbjs/chen26/espnet_01_23/egs2/librispeech/ssl1/exp_li/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_raw_layer_9/valid.acc_m.ave_10best.pth
66
+ ignore_init_mismatch: false
67
+ freeze_param: []
68
+ num_iters_per_epoch: 2000
69
+ batch_size: 20
70
+ valid_batch_size: null
71
+ batch_bins: 30000000
72
+ valid_batch_bins: null
73
+ train_shape_file:
74
+ - exp_babel/hubert_iter2_stats_raw/train/speech_shape
75
+ - exp_babel/hubert_iter2_stats_raw/train/text_shape.word
76
+ valid_shape_file:
77
+ - exp_babel/hubert_iter2_stats_raw/valid/speech_shape
78
+ - exp_babel/hubert_iter2_stats_raw/valid/text_shape.word
79
+ batch_type: numel
80
+ valid_batch_type: null
81
+ fold_length:
82
+ - 80000
83
+ - 400
84
+ sort_in_batch: descending
85
+ sort_batch: descending
86
+ multiple_iterator: false
87
+ chunk_length: 500
88
+ chunk_shift_ratio: 0.5
89
+ num_cache_chunks: 1024
90
+ train_data_path_and_name_and_type:
91
+ - - dump/raw/train_fleurs_babel/wav.scp
92
+ - speech
93
+ - kaldi_ark
94
+ - - dump/raw/train_fleurs_babel/text.km.kmeans_iter2_hubert_train_fleurs_babel_portion0.1
95
+ - text
96
+ - text
97
+ valid_data_path_and_name_and_type:
98
+ - - dump/raw/dev_all_li/wav.scp
99
+ - speech
100
+ - kaldi_ark
101
+ - - dump/raw/dev_all_li/text.km.kmeans_iter2_hubert_train_fleurs_babel_portion0.1
102
+ - text
103
+ - text
104
+ allow_variable_data_keys: false
105
+ max_cache_size: 0.0
106
+ max_cache_fd: 32
107
+ valid_max_cache_size: null
108
+ optim: adam
109
+ optim_conf:
110
+ lr: 0.0005
111
+ scheduler: warmuplr
112
+ scheduler_conf:
113
+ warmup_steps: 32000
114
+ token_list:
115
+ - '55'
116
+ - '477'
117
+ - '43'
118
+ - '405'
119
+ - '468'
120
+ - '486'
121
+ - '41'
122
+ - '34'
123
+ - '0'
124
+ - '415'
125
+ - '409'
126
+ - '367'
127
+ - '224'
128
+ - '436'
129
+ - '17'
130
+ - '462'
131
+ - '32'
132
+ - '481'
133
+ - '332'
134
+ - '291'
135
+ - '395'
136
+ - '336'
137
+ - '147'
138
+ - '364'
139
+ - '430'
140
+ - '454'
141
+ - '44'
142
+ - '196'
143
+ - '269'
144
+ - '230'
145
+ - '189'
146
+ - '274'
147
+ - '305'
148
+ - '419'
149
+ - '444'
150
+ - '352'
151
+ - '72'
152
+ - '158'
153
+ - '349'
154
+ - '212'
155
+ - '148'
156
+ - '13'
157
+ - '140'
158
+ - '164'
159
+ - '329'
160
+ - '288'
161
+ - '259'
162
+ - '9'
163
+ - '249'
164
+ - '30'
165
+ - '198'
166
+ - '299'
167
+ - '427'
168
+ - '301'
169
+ - '107'
170
+ - '344'
171
+ - '213'
172
+ - '36'
173
+ - '203'
174
+ - '120'
175
+ - '208'
176
+ - '285'
177
+ - '69'
178
+ - '181'
179
+ - '166'
180
+ - '130'
181
+ - '402'
182
+ - '194'
183
+ - '37'
184
+ - '106'
185
+ - '330'
186
+ - '407'
187
+ - '242'
188
+ - '412'
189
+ - '167'
190
+ - '133'
191
+ - '40'
192
+ - '211'
193
+ - '57'
194
+ - '389'
195
+ - '325'
196
+ - '197'
197
+ - '170'
198
+ - '190'
199
+ - '260'
200
+ - '286'
201
+ - '257'
202
+ - '374'
203
+ - '116'
204
+ - '38'
205
+ - '221'
206
+ - '81'
207
+ - '87'
208
+ - '4'
209
+ - '173'
210
+ - '94'
211
+ - '83'
212
+ - '200'
213
+ - '331'
214
+ - '143'
215
+ - '248'
216
+ - '22'
217
+ - '26'
218
+ - '388'
219
+ - '174'
220
+ - '488'
221
+ - '320'
222
+ - '397'
223
+ - '188'
224
+ - '401'
225
+ - '126'
226
+ - '28'
227
+ - '11'
228
+ - '304'
229
+ - '135'
230
+ - '33'
231
+ - '109'
232
+ - '202'
233
+ - '267'
234
+ - '86'
235
+ - '487'
236
+ - '490'
237
+ - '482'
238
+ - '426'
239
+ - '103'
240
+ - '151'
241
+ - '324'
242
+ - '492'
243
+ - '238'
244
+ - '483'
245
+ - '467'
246
+ - '1'
247
+ - '20'
248
+ - '268'
249
+ - '21'
250
+ - '47'
251
+ - '377'
252
+ - '351'
253
+ - '297'
254
+ - '398'
255
+ - '348'
256
+ - '157'
257
+ - '303'
258
+ - '100'
259
+ - '68'
260
+ - '254'
261
+ - '216'
262
+ - '177'
263
+ - '491'
264
+ - '171'
265
+ - '361'
266
+ - '24'
267
+ - '338'
268
+ - '129'
269
+ - '154'
270
+ - '192'
271
+ - '222'
272
+ - '8'
273
+ - '156'
274
+ - '7'
275
+ - '78'
276
+ - '64'
277
+ - '29'
278
+ - '146'
279
+ - '90'
280
+ - '263'
281
+ - '393'
282
+ - '95'
283
+ - '102'
284
+ - '433'
285
+ - '480'
286
+ - '225'
287
+ - '59'
288
+ - '66'
289
+ - '82'
290
+ - '85'
291
+ - '54'
292
+ - '310'
293
+ - '429'
294
+ - '176'
295
+ - '366'
296
+ - '42'
297
+ - '298'
298
+ - '144'
299
+ - '215'
300
+ - '318'
301
+ - '136'
302
+ - '122'
303
+ - '459'
304
+ - '205'
305
+ - '498'
306
+ - '112'
307
+ - '52'
308
+ - '396'
309
+ - '282'
310
+ - '428'
311
+ - '335'
312
+ - '339'
313
+ - '386'
314
+ - '289'
315
+ - '187'
316
+ - '333'
317
+ - '449'
318
+ - '458'
319
+ - '233'
320
+ - '35'
321
+ - '400'
322
+ - '223'
323
+ - '375'
324
+ - '70'
325
+ - '134'
326
+ - '127'
327
+ - '410'
328
+ - '71'
329
+ - '312'
330
+ - '73'
331
+ - '341'
332
+ - '326'
333
+ - '273'
334
+ - '472'
335
+ - '23'
336
+ - '113'
337
+ - '117'
338
+ - '387'
339
+ - '207'
340
+ - '342'
341
+ - '12'
342
+ - '49'
343
+ - '281'
344
+ - '65'
345
+ - '356'
346
+ - '99'
347
+ - '423'
348
+ - '141'
349
+ - '493'
350
+ - '61'
351
+ - '494'
352
+ - '277'
353
+ - '453'
354
+ - '362'
355
+ - '185'
356
+ - '460'
357
+ - '256'
358
+ - '159'
359
+ - '302'
360
+ - '88'
361
+ - '53'
362
+ - '76'
363
+ - '243'
364
+ - '235'
365
+ - '306'
366
+ - '278'
367
+ - '15'
368
+ - '56'
369
+ - '25'
370
+ - '115'
371
+ - '48'
372
+ - '264'
373
+ - '363'
374
+ - '110'
375
+ - '204'
376
+ - '414'
377
+ - '287'
378
+ - '184'
379
+ - '172'
380
+ - '383'
381
+ - '316'
382
+ - '424'
383
+ - '169'
384
+ - '358'
385
+ - '14'
386
+ - '206'
387
+ - '91'
388
+ - '245'
389
+ - '447'
390
+ - '60'
391
+ - '125'
392
+ - '283'
393
+ - '246'
394
+ - '255'
395
+ - '313'
396
+ - '97'
397
+ - '89'
398
+ - '321'
399
+ - '214'
400
+ - '314'
401
+ - '464'
402
+ - '27'
403
+ - '294'
404
+ - '497'
405
+ - '128'
406
+ - '451'
407
+ - '365'
408
+ - '478'
409
+ - '337'
410
+ - '226'
411
+ - '422'
412
+ - '471'
413
+ - '381'
414
+ - '63'
415
+ - '452'
416
+ - '290'
417
+ - '118'
418
+ - '51'
419
+ - '261'
420
+ - '432'
421
+ - '376'
422
+ - '31'
423
+ - '80'
424
+ - '142'
425
+ - '295'
426
+ - '275'
427
+ - '272'
428
+ - '123'
429
+ - '270'
430
+ - '236'
431
+ - '195'
432
+ - '469'
433
+ - '50'
434
+ - '218'
435
+ - '435'
436
+ - '479'
437
+ - '315'
438
+ - '182'
439
+ - '372'
440
+ - '446'
441
+ - '132'
442
+ - '327'
443
+ - '229'
444
+ - '217'
445
+ - '373'
446
+ - '340'
447
+ - '153'
448
+ - '2'
449
+ - '163'
450
+ - '199'
451
+ - '378'
452
+ - '101'
453
+ - '79'
454
+ - '96'
455
+ - '434'
456
+ - '489'
457
+ - '247'
458
+ - '440'
459
+ - '448'
460
+ - '139'
461
+ - '466'
462
+ - '150'
463
+ - '465'
464
+ - '62'
465
+ - '421'
466
+ - '252'
467
+ - '104'
468
+ - '180'
469
+ - '232'
470
+ - '108'
471
+ - '307'
472
+ - '219'
473
+ - '228'
474
+ - '322'
475
+ - '455'
476
+ - '370'
477
+ - '39'
478
+ - '280'
479
+ - '114'
480
+ - '240'
481
+ - '137'
482
+ - '179'
483
+ - '162'
484
+ - '406'
485
+ - '168'
486
+ - '368'
487
+ - '473'
488
+ - '75'
489
+ - '441'
490
+ - '266'
491
+ - '442'
492
+ - '119'
493
+ - '347'
494
+ - '92'
495
+ - '209'
496
+ - '470'
497
+ - '296'
498
+ - '476'
499
+ - '93'
500
+ - '191'
501
+ - '437'
502
+ - '293'
503
+ - '186'
504
+ - '111'
505
+ - '265'
506
+ - '183'
507
+ - '145'
508
+ - '394'
509
+ - '155'
510
+ - '420'
511
+ - '438'
512
+ - '5'
513
+ - '463'
514
+ - '431'
515
+ - '334'
516
+ - '138'
517
+ - '3'
518
+ - '369'
519
+ - '403'
520
+ - '84'
521
+ - '152'
522
+ - '392'
523
+ - '18'
524
+ - '231'
525
+ - '417'
526
+ - '160'
527
+ - '357'
528
+ - '323'
529
+ - '475'
530
+ - '131'
531
+ - '485'
532
+ - '350'
533
+ - '450'
534
+ - '439'
535
+ - '353'
536
+ - '443'
537
+ - '384'
538
+ - '16'
539
+ - '201'
540
+ - '346'
541
+ - '253'
542
+ - '404'
543
+ - '445'
544
+ - '250'
545
+ - '165'
546
+ - '98'
547
+ - '193'
548
+ - '300'
549
+ - '328'
550
+ - '234'
551
+ - '496'
552
+ - '67'
553
+ - '359'
554
+ - '46'
555
+ - '345'
556
+ - '317'
557
+ - '354'
558
+ - '385'
559
+ - '276'
560
+ - '309'
561
+ - '425'
562
+ - '311'
563
+ - '456'
564
+ - '220'
565
+ - '178'
566
+ - '124'
567
+ - '244'
568
+ - '416'
569
+ - '399'
570
+ - '161'
571
+ - '413'
572
+ - '308'
573
+ - '371'
574
+ - '258'
575
+ - '45'
576
+ - '360'
577
+ - '149'
578
+ - '284'
579
+ - '241'
580
+ - '319'
581
+ - '411'
582
+ - '461'
583
+ - '237'
584
+ - '408'
585
+ - '390'
586
+ - '227'
587
+ - '382'
588
+ - '10'
589
+ - '292'
590
+ - '355'
591
+ - '262'
592
+ - '418'
593
+ - '379'
594
+ - '6'
595
+ - '271'
596
+ - '380'
597
+ - '105'
598
+ - '251'
599
+ - '175'
600
+ - '239'
601
+ - '210'
602
+ - '74'
603
+ - '495'
604
+ - '279'
605
+ - '457'
606
+ - '343'
607
+ - '77'
608
+ - '19'
609
+ - '391'
610
+ - '121'
611
+ - '499'
612
+ - '474'
613
+ - '484'
614
+ - '58'
615
+ - <unk>
616
+ - <sos/eos>
617
+ init: null
618
+ collate_fn_conf:
619
+ label_downsampling: 1
620
+ pad: false
621
+ rand_crop: true
622
+ mix_speech: true
623
+ noise_apply_prob: 0.2
624
+ input_size: 1
625
+ num_classes: 500
626
+ use_preprocessor: true
627
+ use_mixing: true
628
+ cs_aug: false
629
+ mixing_splits: 16
630
+ token_type: word
631
+ bpemodel: null
632
+ non_linguistic_symbols: null
633
+ cleaner: null
634
+ g2p: null
635
+ speech_volume_normalize: null
636
+ rir_scp: null
637
+ rir_apply_prob: 1.0
638
+ noise_scp: data/noise/wav.scp
639
+ noise_apply_prob: 0.2
640
+ noise_db_range: '13_15'
641
+ pred_masked_weight: 1.0
642
+ pred_nomask_weight: 0.0
643
+ loss_weights: 0.0
644
+ frontend: null
645
+ frontend_conf: {}
646
+ specaug: null
647
+ specaug_conf: {}
648
+ normalize: null
649
+ normalize_conf: {}
650
+ preencoder: null
651
+ preencoder_conf: {}
652
+ encoder: torchaudio_hubert
653
+ encoder_conf:
654
+ encoder_projection_dropout: 0.0
655
+ encoder_attention_dropout: 0.0
656
+ encoder_ff_interm_dropout: 0.0
657
+ encoder_dropout: 0.0
658
+ encoder_layer_drop: 0.0
659
+ extractor_mode: layer_norm
660
+ encoder_embed_dim: 1024
661
+ encoder_num_layers: 24
662
+ encoder_num_heads: 16
663
+ encoder_ff_interm_features: 4096
664
+ encoder_layer_norm_first: true
665
+ final_dim: 768
666
+ feature_grad_mult: null
667
+ model: torchaudio
668
+ model_conf: {}
669
+ required:
670
+ - output_dir
671
+ - token_list
672
+ version: '202211'
673
+ distributed: true
exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/acc_m.png ADDED
exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/acc_u.png ADDED
exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/backward_time.png ADDED
exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/correct_m.png ADDED
exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/correct_u.png ADDED
exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/count_m.png ADDED
exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/count_u.png ADDED
exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/forward_time.png ADDED
exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/gpu_max_cached_mem_GB.png ADDED
exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/iter_time.png ADDED
exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/loss.png ADDED
exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/optim0_lr0.png ADDED
exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/optim_step_time.png ADDED
exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/images/train_time.png ADDED
exp_li/kmeans_iter2_hubert_train_li110_lid_portion0.1/km_500.mdl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7497b3a32380a6e3c3e8f09ae2e007bd3bb8e9d10bc3ba8a225d3714a5b4cf8f
3
+ size 1538858
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202211'
2
+ files:
3
+ model_file: exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/5epoch.pth
4
+ python: "3.8.16 (default, Jan 17 2023, 23:13:24) \n[GCC 11.2.0]"
5
+ timestamp: 1696218193.437084
6
+ torch: 2.0.1+cu117
7
+ yaml_files:
8
+ train_config: exp_babel/hubert_iter2_train_ssl_torchaudiohubert_large_960h_pretrain_it2_wavlm_babel_light_raw_layer_9/config.yaml