pravin96 commited on
Commit
ccbdd15
1 Parent(s): 5620633

Training in progress, step 250

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "distil-whisper/distil-medium.en",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
@@ -13,17 +13,17 @@
13
  ],
14
  "bos_token_id": 50257,
15
  "classifier_proj_size": 256,
16
- "d_model": 1024,
17
- "decoder_attention_heads": 16,
18
- "decoder_ffn_dim": 4096,
19
  "decoder_layerdrop": 0.0,
20
- "decoder_layers": 2,
21
  "decoder_start_token_id": 50257,
22
  "dropout": 0.0,
23
- "encoder_attention_heads": 16,
24
- "encoder_ffn_dim": 4096,
25
  "encoder_layerdrop": 0.0,
26
- "encoder_layers": 24,
27
  "eos_token_id": 50256,
28
  "forced_decoder_ids": [
29
  [
@@ -44,7 +44,7 @@
44
  "max_target_positions": 448,
45
  "median_filter_width": 7,
46
  "model_type": "whisper",
47
- "num_hidden_layers": 24,
48
  "num_mel_bins": 80,
49
  "pad_token_id": 50256,
50
  "scale_embedding": false,
 
1
  {
2
+ "_name_or_path": "distil-whisper/distil-small.en",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
 
13
  ],
14
  "bos_token_id": 50257,
15
  "classifier_proj_size": 256,
16
+ "d_model": 768,
17
+ "decoder_attention_heads": 12,
18
+ "decoder_ffn_dim": 3072,
19
  "decoder_layerdrop": 0.0,
20
+ "decoder_layers": 4,
21
  "decoder_start_token_id": 50257,
22
  "dropout": 0.0,
23
+ "encoder_attention_heads": 12,
24
+ "encoder_ffn_dim": 3072,
25
  "encoder_layerdrop": 0.0,
26
+ "encoder_layers": 12,
27
  "eos_token_id": 50256,
28
  "forced_decoder_ids": [
29
  [
 
44
  "max_target_positions": 448,
45
  "median_filter_width": 7,
46
  "model_type": "whisper",
47
+ "num_hidden_layers": 12,
48
  "num_mel_bins": 80,
49
  "pad_token_id": 50256,
50
  "scale_embedding": false,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58c253dc82b7de7615affd9e23cf35c731fe9f59f4513c8180c2418bdbbd1ff0
3
- size 1577549616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33de7ac2c2faa470d2f84715c0c836954334fd12713a26d6ffe1de3d0af031a2
3
+ size 664561848
runs/Oct24_20-21-51_0b6bd6f5a84e/events.out.tfevents.1729801364.0b6bd6f5a84e.1229.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2248cb0f61998006096fdd3b35fea91feab233e37d4001859f928f602ee9426f
3
+ size 9106
tokenizer_config.json CHANGED
@@ -12,849 +12,849 @@
12
  },
13
  "50257": {
14
  "content": "<|startoftranscript|>",
15
- "lstrip": false,
16
  "normalized": false,
17
- "rstrip": false,
18
  "single_word": false,
19
  "special": true
20
  },
21
  "50258": {
22
  "content": "<|en|>",
23
- "lstrip": false,
24
  "normalized": false,
25
- "rstrip": false,
26
  "single_word": false,
27
  "special": true
28
  },
29
  "50259": {
30
  "content": "<|zh|>",
31
- "lstrip": false,
32
  "normalized": false,
33
- "rstrip": false,
34
  "single_word": false,
35
  "special": true
36
  },
37
  "50260": {
38
  "content": "<|de|>",
39
- "lstrip": false,
40
  "normalized": false,
41
- "rstrip": false,
42
  "single_word": false,
43
  "special": true
44
  },
45
  "50261": {
46
  "content": "<|es|>",
47
- "lstrip": false,
48
  "normalized": false,
49
- "rstrip": false,
50
  "single_word": false,
51
  "special": true
52
  },
53
  "50262": {
54
  "content": "<|ru|>",
55
- "lstrip": false,
56
  "normalized": false,
57
- "rstrip": false,
58
  "single_word": false,
59
  "special": true
60
  },
61
  "50263": {
62
  "content": "<|ko|>",
63
- "lstrip": false,
64
  "normalized": false,
65
- "rstrip": false,
66
  "single_word": false,
67
  "special": true
68
  },
69
  "50264": {
70
  "content": "<|fr|>",
71
- "lstrip": false,
72
  "normalized": false,
73
- "rstrip": false,
74
  "single_word": false,
75
  "special": true
76
  },
77
  "50265": {
78
  "content": "<|ja|>",
79
- "lstrip": false,
80
  "normalized": false,
81
- "rstrip": false,
82
  "single_word": false,
83
  "special": true
84
  },
85
  "50266": {
86
  "content": "<|pt|>",
87
- "lstrip": false,
88
  "normalized": false,
89
- "rstrip": false,
90
  "single_word": false,
91
  "special": true
92
  },
93
  "50267": {
94
  "content": "<|tr|>",
95
- "lstrip": false,
96
  "normalized": false,
97
- "rstrip": false,
98
  "single_word": false,
99
  "special": true
100
  },
101
  "50268": {
102
  "content": "<|pl|>",
103
- "lstrip": false,
104
  "normalized": false,
105
- "rstrip": false,
106
  "single_word": false,
107
  "special": true
108
  },
109
  "50269": {
110
  "content": "<|ca|>",
111
- "lstrip": false,
112
  "normalized": false,
113
- "rstrip": false,
114
  "single_word": false,
115
  "special": true
116
  },
117
  "50270": {
118
  "content": "<|nl|>",
119
- "lstrip": false,
120
  "normalized": false,
121
- "rstrip": false,
122
  "single_word": false,
123
  "special": true
124
  },
125
  "50271": {
126
  "content": "<|ar|>",
127
- "lstrip": false,
128
  "normalized": false,
129
- "rstrip": false,
130
  "single_word": false,
131
  "special": true
132
  },
133
  "50272": {
134
  "content": "<|sv|>",
135
- "lstrip": false,
136
  "normalized": false,
137
- "rstrip": false,
138
  "single_word": false,
139
  "special": true
140
  },
141
  "50273": {
142
  "content": "<|it|>",
143
- "lstrip": false,
144
  "normalized": false,
145
- "rstrip": false,
146
  "single_word": false,
147
  "special": true
148
  },
149
  "50274": {
150
  "content": "<|id|>",
151
- "lstrip": false,
152
  "normalized": false,
153
- "rstrip": false,
154
  "single_word": false,
155
  "special": true
156
  },
157
  "50275": {
158
  "content": "<|hi|>",
159
- "lstrip": false,
160
  "normalized": false,
161
- "rstrip": false,
162
  "single_word": false,
163
  "special": true
164
  },
165
  "50276": {
166
  "content": "<|fi|>",
167
- "lstrip": false,
168
  "normalized": false,
169
- "rstrip": false,
170
  "single_word": false,
171
  "special": true
172
  },
173
  "50277": {
174
  "content": "<|vi|>",
175
- "lstrip": false,
176
  "normalized": false,
177
- "rstrip": false,
178
  "single_word": false,
179
  "special": true
180
  },
181
  "50278": {
182
  "content": "<|iw|>",
183
- "lstrip": false,
184
  "normalized": false,
185
- "rstrip": false,
186
  "single_word": false,
187
  "special": true
188
  },
189
  "50279": {
190
  "content": "<|uk|>",
191
- "lstrip": false,
192
  "normalized": false,
193
- "rstrip": false,
194
  "single_word": false,
195
  "special": true
196
  },
197
  "50280": {
198
  "content": "<|el|>",
199
- "lstrip": false,
200
  "normalized": false,
201
- "rstrip": false,
202
  "single_word": false,
203
  "special": true
204
  },
205
  "50281": {
206
  "content": "<|ms|>",
207
- "lstrip": false,
208
  "normalized": false,
209
- "rstrip": false,
210
  "single_word": false,
211
  "special": true
212
  },
213
  "50282": {
214
  "content": "<|cs|>",
215
- "lstrip": false,
216
  "normalized": false,
217
- "rstrip": false,
218
  "single_word": false,
219
  "special": true
220
  },
221
  "50283": {
222
  "content": "<|ro|>",
223
- "lstrip": false,
224
  "normalized": false,
225
- "rstrip": false,
226
  "single_word": false,
227
  "special": true
228
  },
229
  "50284": {
230
  "content": "<|da|>",
231
- "lstrip": false,
232
  "normalized": false,
233
- "rstrip": false,
234
  "single_word": false,
235
  "special": true
236
  },
237
  "50285": {
238
  "content": "<|hu|>",
239
- "lstrip": false,
240
  "normalized": false,
241
- "rstrip": false,
242
  "single_word": false,
243
  "special": true
244
  },
245
  "50286": {
246
  "content": "<|ta|>",
247
- "lstrip": false,
248
  "normalized": false,
249
- "rstrip": false,
250
  "single_word": false,
251
  "special": true
252
  },
253
  "50287": {
254
  "content": "<|no|>",
255
- "lstrip": false,
256
  "normalized": false,
257
- "rstrip": false,
258
  "single_word": false,
259
  "special": true
260
  },
261
  "50288": {
262
  "content": "<|th|>",
263
- "lstrip": false,
264
  "normalized": false,
265
- "rstrip": false,
266
  "single_word": false,
267
  "special": true
268
  },
269
  "50289": {
270
  "content": "<|ur|>",
271
- "lstrip": false,
272
  "normalized": false,
273
- "rstrip": false,
274
  "single_word": false,
275
  "special": true
276
  },
277
  "50290": {
278
  "content": "<|hr|>",
279
- "lstrip": false,
280
  "normalized": false,
281
- "rstrip": false,
282
  "single_word": false,
283
  "special": true
284
  },
285
  "50291": {
286
  "content": "<|bg|>",
287
- "lstrip": false,
288
  "normalized": false,
289
- "rstrip": false,
290
  "single_word": false,
291
  "special": true
292
  },
293
  "50292": {
294
  "content": "<|lt|>",
295
- "lstrip": false,
296
  "normalized": false,
297
- "rstrip": false,
298
  "single_word": false,
299
  "special": true
300
  },
301
  "50293": {
302
  "content": "<|la|>",
303
- "lstrip": false,
304
  "normalized": false,
305
- "rstrip": false,
306
  "single_word": false,
307
  "special": true
308
  },
309
  "50294": {
310
  "content": "<|mi|>",
311
- "lstrip": false,
312
  "normalized": false,
313
- "rstrip": false,
314
  "single_word": false,
315
  "special": true
316
  },
317
  "50295": {
318
  "content": "<|ml|>",
319
- "lstrip": false,
320
  "normalized": false,
321
- "rstrip": false,
322
  "single_word": false,
323
  "special": true
324
  },
325
  "50296": {
326
  "content": "<|cy|>",
327
- "lstrip": false,
328
  "normalized": false,
329
- "rstrip": false,
330
  "single_word": false,
331
  "special": true
332
  },
333
  "50297": {
334
  "content": "<|sk|>",
335
- "lstrip": false,
336
  "normalized": false,
337
- "rstrip": false,
338
  "single_word": false,
339
  "special": true
340
  },
341
  "50298": {
342
  "content": "<|te|>",
343
- "lstrip": false,
344
  "normalized": false,
345
- "rstrip": false,
346
  "single_word": false,
347
  "special": true
348
  },
349
  "50299": {
350
  "content": "<|fa|>",
351
- "lstrip": false,
352
  "normalized": false,
353
- "rstrip": false,
354
  "single_word": false,
355
  "special": true
356
  },
357
  "50300": {
358
  "content": "<|lv|>",
359
- "lstrip": false,
360
  "normalized": false,
361
- "rstrip": false,
362
  "single_word": false,
363
  "special": true
364
  },
365
  "50301": {
366
  "content": "<|bn|>",
367
- "lstrip": false,
368
  "normalized": false,
369
- "rstrip": false,
370
  "single_word": false,
371
  "special": true
372
  },
373
  "50302": {
374
  "content": "<|sr|>",
375
- "lstrip": false,
376
  "normalized": false,
377
- "rstrip": false,
378
  "single_word": false,
379
  "special": true
380
  },
381
  "50303": {
382
  "content": "<|az|>",
383
- "lstrip": false,
384
  "normalized": false,
385
- "rstrip": false,
386
  "single_word": false,
387
  "special": true
388
  },
389
  "50304": {
390
  "content": "<|sl|>",
391
- "lstrip": false,
392
  "normalized": false,
393
- "rstrip": false,
394
  "single_word": false,
395
  "special": true
396
  },
397
  "50305": {
398
  "content": "<|kn|>",
399
- "lstrip": false,
400
  "normalized": false,
401
- "rstrip": false,
402
  "single_word": false,
403
  "special": true
404
  },
405
  "50306": {
406
  "content": "<|et|>",
407
- "lstrip": false,
408
  "normalized": false,
409
- "rstrip": false,
410
  "single_word": false,
411
  "special": true
412
  },
413
  "50307": {
414
  "content": "<|mk|>",
415
- "lstrip": false,
416
  "normalized": false,
417
- "rstrip": false,
418
  "single_word": false,
419
  "special": true
420
  },
421
  "50308": {
422
  "content": "<|br|>",
423
- "lstrip": false,
424
  "normalized": false,
425
- "rstrip": false,
426
  "single_word": false,
427
  "special": true
428
  },
429
  "50309": {
430
  "content": "<|eu|>",
431
- "lstrip": false,
432
  "normalized": false,
433
- "rstrip": false,
434
  "single_word": false,
435
  "special": true
436
  },
437
  "50310": {
438
  "content": "<|is|>",
439
- "lstrip": false,
440
  "normalized": false,
441
- "rstrip": false,
442
  "single_word": false,
443
  "special": true
444
  },
445
  "50311": {
446
  "content": "<|hy|>",
447
- "lstrip": false,
448
  "normalized": false,
449
- "rstrip": false,
450
  "single_word": false,
451
  "special": true
452
  },
453
  "50312": {
454
  "content": "<|ne|>",
455
- "lstrip": false,
456
  "normalized": false,
457
- "rstrip": false,
458
  "single_word": false,
459
  "special": true
460
  },
461
  "50313": {
462
  "content": "<|mn|>",
463
- "lstrip": false,
464
  "normalized": false,
465
- "rstrip": false,
466
  "single_word": false,
467
  "special": true
468
  },
469
  "50314": {
470
  "content": "<|bs|>",
471
- "lstrip": false,
472
  "normalized": false,
473
- "rstrip": false,
474
  "single_word": false,
475
  "special": true
476
  },
477
  "50315": {
478
  "content": "<|kk|>",
479
- "lstrip": false,
480
  "normalized": false,
481
- "rstrip": false,
482
  "single_word": false,
483
  "special": true
484
  },
485
  "50316": {
486
  "content": "<|sq|>",
487
- "lstrip": false,
488
  "normalized": false,
489
- "rstrip": false,
490
  "single_word": false,
491
  "special": true
492
  },
493
  "50317": {
494
  "content": "<|sw|>",
495
- "lstrip": false,
496
  "normalized": false,
497
- "rstrip": false,
498
  "single_word": false,
499
  "special": true
500
  },
501
  "50318": {
502
  "content": "<|gl|>",
503
- "lstrip": false,
504
  "normalized": false,
505
- "rstrip": false,
506
  "single_word": false,
507
  "special": true
508
  },
509
  "50319": {
510
  "content": "<|mr|>",
511
- "lstrip": false,
512
  "normalized": false,
513
- "rstrip": false,
514
  "single_word": false,
515
  "special": true
516
  },
517
  "50320": {
518
  "content": "<|pa|>",
519
- "lstrip": false,
520
  "normalized": false,
521
- "rstrip": false,
522
  "single_word": false,
523
  "special": true
524
  },
525
  "50321": {
526
  "content": "<|si|>",
527
- "lstrip": false,
528
  "normalized": false,
529
- "rstrip": false,
530
  "single_word": false,
531
  "special": true
532
  },
533
  "50322": {
534
  "content": "<|km|>",
535
- "lstrip": false,
536
  "normalized": false,
537
- "rstrip": false,
538
  "single_word": false,
539
  "special": true
540
  },
541
  "50323": {
542
  "content": "<|sn|>",
543
- "lstrip": false,
544
  "normalized": false,
545
- "rstrip": false,
546
  "single_word": false,
547
  "special": true
548
  },
549
  "50324": {
550
  "content": "<|yo|>",
551
- "lstrip": false,
552
  "normalized": false,
553
- "rstrip": false,
554
  "single_word": false,
555
  "special": true
556
  },
557
  "50325": {
558
  "content": "<|so|>",
559
- "lstrip": false,
560
  "normalized": false,
561
- "rstrip": false,
562
  "single_word": false,
563
  "special": true
564
  },
565
  "50326": {
566
  "content": "<|af|>",
567
- "lstrip": false,
568
  "normalized": false,
569
- "rstrip": false,
570
  "single_word": false,
571
  "special": true
572
  },
573
  "50327": {
574
  "content": "<|oc|>",
575
- "lstrip": false,
576
  "normalized": false,
577
- "rstrip": false,
578
  "single_word": false,
579
  "special": true
580
  },
581
  "50328": {
582
  "content": "<|ka|>",
583
- "lstrip": false,
584
  "normalized": false,
585
- "rstrip": false,
586
  "single_word": false,
587
  "special": true
588
  },
589
  "50329": {
590
  "content": "<|be|>",
591
- "lstrip": false,
592
  "normalized": false,
593
- "rstrip": false,
594
  "single_word": false,
595
  "special": true
596
  },
597
  "50330": {
598
  "content": "<|tg|>",
599
- "lstrip": false,
600
  "normalized": false,
601
- "rstrip": false,
602
  "single_word": false,
603
  "special": true
604
  },
605
  "50331": {
606
  "content": "<|sd|>",
607
- "lstrip": false,
608
  "normalized": false,
609
- "rstrip": false,
610
  "single_word": false,
611
  "special": true
612
  },
613
  "50332": {
614
  "content": "<|gu|>",
615
- "lstrip": false,
616
  "normalized": false,
617
- "rstrip": false,
618
  "single_word": false,
619
  "special": true
620
  },
621
  "50333": {
622
  "content": "<|am|>",
623
- "lstrip": false,
624
  "normalized": false,
625
- "rstrip": false,
626
  "single_word": false,
627
  "special": true
628
  },
629
  "50334": {
630
  "content": "<|yi|>",
631
- "lstrip": false,
632
  "normalized": false,
633
- "rstrip": false,
634
  "single_word": false,
635
  "special": true
636
  },
637
  "50335": {
638
  "content": "<|lo|>",
639
- "lstrip": false,
640
  "normalized": false,
641
- "rstrip": false,
642
  "single_word": false,
643
  "special": true
644
  },
645
  "50336": {
646
  "content": "<|uz|>",
647
- "lstrip": false,
648
  "normalized": false,
649
- "rstrip": false,
650
  "single_word": false,
651
  "special": true
652
  },
653
  "50337": {
654
  "content": "<|fo|>",
655
- "lstrip": false,
656
  "normalized": false,
657
- "rstrip": false,
658
  "single_word": false,
659
  "special": true
660
  },
661
  "50338": {
662
  "content": "<|ht|>",
663
- "lstrip": false,
664
  "normalized": false,
665
- "rstrip": false,
666
  "single_word": false,
667
  "special": true
668
  },
669
  "50339": {
670
  "content": "<|ps|>",
671
- "lstrip": false,
672
  "normalized": false,
673
- "rstrip": false,
674
  "single_word": false,
675
  "special": true
676
  },
677
  "50340": {
678
  "content": "<|tk|>",
679
- "lstrip": false,
680
  "normalized": false,
681
- "rstrip": false,
682
  "single_word": false,
683
  "special": true
684
  },
685
  "50341": {
686
  "content": "<|nn|>",
687
- "lstrip": false,
688
  "normalized": false,
689
- "rstrip": false,
690
  "single_word": false,
691
  "special": true
692
  },
693
  "50342": {
694
  "content": "<|mt|>",
695
- "lstrip": false,
696
  "normalized": false,
697
- "rstrip": false,
698
  "single_word": false,
699
  "special": true
700
  },
701
  "50343": {
702
  "content": "<|sa|>",
703
- "lstrip": false,
704
  "normalized": false,
705
- "rstrip": false,
706
  "single_word": false,
707
  "special": true
708
  },
709
  "50344": {
710
  "content": "<|lb|>",
711
- "lstrip": false,
712
  "normalized": false,
713
- "rstrip": false,
714
  "single_word": false,
715
  "special": true
716
  },
717
  "50345": {
718
  "content": "<|my|>",
719
- "lstrip": false,
720
  "normalized": false,
721
- "rstrip": false,
722
  "single_word": false,
723
  "special": true
724
  },
725
  "50346": {
726
  "content": "<|bo|>",
727
- "lstrip": false,
728
  "normalized": false,
729
- "rstrip": false,
730
  "single_word": false,
731
  "special": true
732
  },
733
  "50347": {
734
  "content": "<|tl|>",
735
- "lstrip": false,
736
  "normalized": false,
737
- "rstrip": false,
738
  "single_word": false,
739
  "special": true
740
  },
741
  "50348": {
742
  "content": "<|mg|>",
743
- "lstrip": false,
744
  "normalized": false,
745
- "rstrip": false,
746
  "single_word": false,
747
  "special": true
748
  },
749
  "50349": {
750
  "content": "<|as|>",
751
- "lstrip": false,
752
  "normalized": false,
753
- "rstrip": false,
754
  "single_word": false,
755
  "special": true
756
  },
757
  "50350": {
758
  "content": "<|tt|>",
759
- "lstrip": false,
760
  "normalized": false,
761
- "rstrip": false,
762
  "single_word": false,
763
  "special": true
764
  },
765
  "50351": {
766
  "content": "<|haw|>",
767
- "lstrip": false,
768
  "normalized": false,
769
- "rstrip": false,
770
  "single_word": false,
771
  "special": true
772
  },
773
  "50352": {
774
  "content": "<|ln|>",
775
- "lstrip": false,
776
  "normalized": false,
777
- "rstrip": false,
778
  "single_word": false,
779
  "special": true
780
  },
781
  "50353": {
782
  "content": "<|ha|>",
783
- "lstrip": false,
784
  "normalized": false,
785
- "rstrip": false,
786
  "single_word": false,
787
  "special": true
788
  },
789
  "50354": {
790
  "content": "<|ba|>",
791
- "lstrip": false,
792
  "normalized": false,
793
- "rstrip": false,
794
  "single_word": false,
795
  "special": true
796
  },
797
  "50355": {
798
  "content": "<|jw|>",
799
- "lstrip": false,
800
  "normalized": false,
801
- "rstrip": false,
802
  "single_word": false,
803
  "special": true
804
  },
805
  "50356": {
806
  "content": "<|su|>",
807
- "lstrip": false,
808
  "normalized": false,
809
- "rstrip": false,
810
  "single_word": false,
811
  "special": true
812
  },
813
  "50357": {
814
  "content": "<|translate|>",
815
- "lstrip": false,
816
  "normalized": false,
817
- "rstrip": false,
818
  "single_word": false,
819
  "special": true
820
  },
821
  "50358": {
822
  "content": "<|transcribe|>",
823
- "lstrip": false,
824
  "normalized": false,
825
- "rstrip": false,
826
  "single_word": false,
827
  "special": true
828
  },
829
  "50359": {
830
  "content": "<|startoflm|>",
831
- "lstrip": false,
832
  "normalized": false,
833
- "rstrip": false,
834
  "single_word": false,
835
  "special": true
836
  },
837
  "50360": {
838
  "content": "<|startofprev|>",
839
- "lstrip": false,
840
  "normalized": false,
841
- "rstrip": false,
842
  "single_word": false,
843
  "special": true
844
  },
845
  "50361": {
846
  "content": "<|nocaptions|>",
847
- "lstrip": false,
848
  "normalized": false,
849
- "rstrip": false,
850
  "single_word": false,
851
  "special": true
852
  },
853
  "50362": {
854
  "content": "<|notimestamps|>",
855
- "lstrip": false,
856
  "normalized": false,
857
- "rstrip": false,
858
  "single_word": false,
859
  "special": true
860
  },
@@ -12984,5 +12984,7 @@
12984
  "processor_class": "WhisperProcessor",
12985
  "return_attention_mask": false,
12986
  "tokenizer_class": "WhisperTokenizer",
12987
- "unk_token": "<|endoftext|>"
 
 
12988
  }
 
12
  },
13
  "50257": {
14
  "content": "<|startoftranscript|>",
15
+ "lstrip": true,
16
  "normalized": false,
17
+ "rstrip": true,
18
  "single_word": false,
19
  "special": true
20
  },
21
  "50258": {
22
  "content": "<|en|>",
23
+ "lstrip": true,
24
  "normalized": false,
25
+ "rstrip": true,
26
  "single_word": false,
27
  "special": true
28
  },
29
  "50259": {
30
  "content": "<|zh|>",
31
+ "lstrip": true,
32
  "normalized": false,
33
+ "rstrip": true,
34
  "single_word": false,
35
  "special": true
36
  },
37
  "50260": {
38
  "content": "<|de|>",
39
+ "lstrip": true,
40
  "normalized": false,
41
+ "rstrip": true,
42
  "single_word": false,
43
  "special": true
44
  },
45
  "50261": {
46
  "content": "<|es|>",
47
+ "lstrip": true,
48
  "normalized": false,
49
+ "rstrip": true,
50
  "single_word": false,
51
  "special": true
52
  },
53
  "50262": {
54
  "content": "<|ru|>",
55
+ "lstrip": true,
56
  "normalized": false,
57
+ "rstrip": true,
58
  "single_word": false,
59
  "special": true
60
  },
61
  "50263": {
62
  "content": "<|ko|>",
63
+ "lstrip": true,
64
  "normalized": false,
65
+ "rstrip": true,
66
  "single_word": false,
67
  "special": true
68
  },
69
  "50264": {
70
  "content": "<|fr|>",
71
+ "lstrip": true,
72
  "normalized": false,
73
+ "rstrip": true,
74
  "single_word": false,
75
  "special": true
76
  },
77
  "50265": {
78
  "content": "<|ja|>",
79
+ "lstrip": true,
80
  "normalized": false,
81
+ "rstrip": true,
82
  "single_word": false,
83
  "special": true
84
  },
85
  "50266": {
86
  "content": "<|pt|>",
87
+ "lstrip": true,
88
  "normalized": false,
89
+ "rstrip": true,
90
  "single_word": false,
91
  "special": true
92
  },
93
  "50267": {
94
  "content": "<|tr|>",
95
+ "lstrip": true,
96
  "normalized": false,
97
+ "rstrip": true,
98
  "single_word": false,
99
  "special": true
100
  },
101
  "50268": {
102
  "content": "<|pl|>",
103
+ "lstrip": true,
104
  "normalized": false,
105
+ "rstrip": true,
106
  "single_word": false,
107
  "special": true
108
  },
109
  "50269": {
110
  "content": "<|ca|>",
111
+ "lstrip": true,
112
  "normalized": false,
113
+ "rstrip": true,
114
  "single_word": false,
115
  "special": true
116
  },
117
  "50270": {
118
  "content": "<|nl|>",
119
+ "lstrip": true,
120
  "normalized": false,
121
+ "rstrip": true,
122
  "single_word": false,
123
  "special": true
124
  },
125
  "50271": {
126
  "content": "<|ar|>",
127
+ "lstrip": true,
128
  "normalized": false,
129
+ "rstrip": true,
130
  "single_word": false,
131
  "special": true
132
  },
133
  "50272": {
134
  "content": "<|sv|>",
135
+ "lstrip": true,
136
  "normalized": false,
137
+ "rstrip": true,
138
  "single_word": false,
139
  "special": true
140
  },
141
  "50273": {
142
  "content": "<|it|>",
143
+ "lstrip": true,
144
  "normalized": false,
145
+ "rstrip": true,
146
  "single_word": false,
147
  "special": true
148
  },
149
  "50274": {
150
  "content": "<|id|>",
151
+ "lstrip": true,
152
  "normalized": false,
153
+ "rstrip": true,
154
  "single_word": false,
155
  "special": true
156
  },
157
  "50275": {
158
  "content": "<|hi|>",
159
+ "lstrip": true,
160
  "normalized": false,
161
+ "rstrip": true,
162
  "single_word": false,
163
  "special": true
164
  },
165
  "50276": {
166
  "content": "<|fi|>",
167
+ "lstrip": true,
168
  "normalized": false,
169
+ "rstrip": true,
170
  "single_word": false,
171
  "special": true
172
  },
173
  "50277": {
174
  "content": "<|vi|>",
175
+ "lstrip": true,
176
  "normalized": false,
177
+ "rstrip": true,
178
  "single_word": false,
179
  "special": true
180
  },
181
  "50278": {
182
  "content": "<|iw|>",
183
+ "lstrip": true,
184
  "normalized": false,
185
+ "rstrip": true,
186
  "single_word": false,
187
  "special": true
188
  },
189
  "50279": {
190
  "content": "<|uk|>",
191
+ "lstrip": true,
192
  "normalized": false,
193
+ "rstrip": true,
194
  "single_word": false,
195
  "special": true
196
  },
197
  "50280": {
198
  "content": "<|el|>",
199
+ "lstrip": true,
200
  "normalized": false,
201
+ "rstrip": true,
202
  "single_word": false,
203
  "special": true
204
  },
205
  "50281": {
206
  "content": "<|ms|>",
207
+ "lstrip": true,
208
  "normalized": false,
209
+ "rstrip": true,
210
  "single_word": false,
211
  "special": true
212
  },
213
  "50282": {
214
  "content": "<|cs|>",
215
+ "lstrip": true,
216
  "normalized": false,
217
+ "rstrip": true,
218
  "single_word": false,
219
  "special": true
220
  },
221
  "50283": {
222
  "content": "<|ro|>",
223
+ "lstrip": true,
224
  "normalized": false,
225
+ "rstrip": true,
226
  "single_word": false,
227
  "special": true
228
  },
229
  "50284": {
230
  "content": "<|da|>",
231
+ "lstrip": true,
232
  "normalized": false,
233
+ "rstrip": true,
234
  "single_word": false,
235
  "special": true
236
  },
237
  "50285": {
238
  "content": "<|hu|>",
239
+ "lstrip": true,
240
  "normalized": false,
241
+ "rstrip": true,
242
  "single_word": false,
243
  "special": true
244
  },
245
  "50286": {
246
  "content": "<|ta|>",
247
+ "lstrip": true,
248
  "normalized": false,
249
+ "rstrip": true,
250
  "single_word": false,
251
  "special": true
252
  },
253
  "50287": {
254
  "content": "<|no|>",
255
+ "lstrip": true,
256
  "normalized": false,
257
+ "rstrip": true,
258
  "single_word": false,
259
  "special": true
260
  },
261
  "50288": {
262
  "content": "<|th|>",
263
+ "lstrip": true,
264
  "normalized": false,
265
+ "rstrip": true,
266
  "single_word": false,
267
  "special": true
268
  },
269
  "50289": {
270
  "content": "<|ur|>",
271
+ "lstrip": true,
272
  "normalized": false,
273
+ "rstrip": true,
274
  "single_word": false,
275
  "special": true
276
  },
277
  "50290": {
278
  "content": "<|hr|>",
279
+ "lstrip": true,
280
  "normalized": false,
281
+ "rstrip": true,
282
  "single_word": false,
283
  "special": true
284
  },
285
  "50291": {
286
  "content": "<|bg|>",
287
+ "lstrip": true,
288
  "normalized": false,
289
+ "rstrip": true,
290
  "single_word": false,
291
  "special": true
292
  },
293
  "50292": {
294
  "content": "<|lt|>",
295
+ "lstrip": true,
296
  "normalized": false,
297
+ "rstrip": true,
298
  "single_word": false,
299
  "special": true
300
  },
301
  "50293": {
302
  "content": "<|la|>",
303
+ "lstrip": true,
304
  "normalized": false,
305
+ "rstrip": true,
306
  "single_word": false,
307
  "special": true
308
  },
309
  "50294": {
310
  "content": "<|mi|>",
311
+ "lstrip": true,
312
  "normalized": false,
313
+ "rstrip": true,
314
  "single_word": false,
315
  "special": true
316
  },
317
  "50295": {
318
  "content": "<|ml|>",
319
+ "lstrip": true,
320
  "normalized": false,
321
+ "rstrip": true,
322
  "single_word": false,
323
  "special": true
324
  },
325
  "50296": {
326
  "content": "<|cy|>",
327
+ "lstrip": true,
328
  "normalized": false,
329
+ "rstrip": true,
330
  "single_word": false,
331
  "special": true
332
  },
333
  "50297": {
334
  "content": "<|sk|>",
335
+ "lstrip": true,
336
  "normalized": false,
337
+ "rstrip": true,
338
  "single_word": false,
339
  "special": true
340
  },
341
  "50298": {
342
  "content": "<|te|>",
343
+ "lstrip": true,
344
  "normalized": false,
345
+ "rstrip": true,
346
  "single_word": false,
347
  "special": true
348
  },
349
  "50299": {
350
  "content": "<|fa|>",
351
+ "lstrip": true,
352
  "normalized": false,
353
+ "rstrip": true,
354
  "single_word": false,
355
  "special": true
356
  },
357
  "50300": {
358
  "content": "<|lv|>",
359
+ "lstrip": true,
360
  "normalized": false,
361
+ "rstrip": true,
362
  "single_word": false,
363
  "special": true
364
  },
365
  "50301": {
366
  "content": "<|bn|>",
367
+ "lstrip": true,
368
  "normalized": false,
369
+ "rstrip": true,
370
  "single_word": false,
371
  "special": true
372
  },
373
  "50302": {
374
  "content": "<|sr|>",
375
+ "lstrip": true,
376
  "normalized": false,
377
+ "rstrip": true,
378
  "single_word": false,
379
  "special": true
380
  },
381
  "50303": {
382
  "content": "<|az|>",
383
+ "lstrip": true,
384
  "normalized": false,
385
+ "rstrip": true,
386
  "single_word": false,
387
  "special": true
388
  },
389
  "50304": {
390
  "content": "<|sl|>",
391
+ "lstrip": true,
392
  "normalized": false,
393
+ "rstrip": true,
394
  "single_word": false,
395
  "special": true
396
  },
397
  "50305": {
398
  "content": "<|kn|>",
399
+ "lstrip": true,
400
  "normalized": false,
401
+ "rstrip": true,
402
  "single_word": false,
403
  "special": true
404
  },
405
  "50306": {
406
  "content": "<|et|>",
407
+ "lstrip": true,
408
  "normalized": false,
409
+ "rstrip": true,
410
  "single_word": false,
411
  "special": true
412
  },
413
  "50307": {
414
  "content": "<|mk|>",
415
+ "lstrip": true,
416
  "normalized": false,
417
+ "rstrip": true,
418
  "single_word": false,
419
  "special": true
420
  },
421
  "50308": {
422
  "content": "<|br|>",
423
+ "lstrip": true,
424
  "normalized": false,
425
+ "rstrip": true,
426
  "single_word": false,
427
  "special": true
428
  },
429
  "50309": {
430
  "content": "<|eu|>",
431
+ "lstrip": true,
432
  "normalized": false,
433
+ "rstrip": true,
434
  "single_word": false,
435
  "special": true
436
  },
437
  "50310": {
438
  "content": "<|is|>",
439
+ "lstrip": true,
440
  "normalized": false,
441
+ "rstrip": true,
442
  "single_word": false,
443
  "special": true
444
  },
445
  "50311": {
446
  "content": "<|hy|>",
447
+ "lstrip": true,
448
  "normalized": false,
449
+ "rstrip": true,
450
  "single_word": false,
451
  "special": true
452
  },
453
  "50312": {
454
  "content": "<|ne|>",
455
+ "lstrip": true,
456
  "normalized": false,
457
+ "rstrip": true,
458
  "single_word": false,
459
  "special": true
460
  },
461
  "50313": {
462
  "content": "<|mn|>",
463
+ "lstrip": true,
464
  "normalized": false,
465
+ "rstrip": true,
466
  "single_word": false,
467
  "special": true
468
  },
469
  "50314": {
470
  "content": "<|bs|>",
471
+ "lstrip": true,
472
  "normalized": false,
473
+ "rstrip": true,
474
  "single_word": false,
475
  "special": true
476
  },
477
  "50315": {
478
  "content": "<|kk|>",
479
+ "lstrip": true,
480
  "normalized": false,
481
+ "rstrip": true,
482
  "single_word": false,
483
  "special": true
484
  },
485
  "50316": {
486
  "content": "<|sq|>",
487
+ "lstrip": true,
488
  "normalized": false,
489
+ "rstrip": true,
490
  "single_word": false,
491
  "special": true
492
  },
493
  "50317": {
494
  "content": "<|sw|>",
495
+ "lstrip": true,
496
  "normalized": false,
497
+ "rstrip": true,
498
  "single_word": false,
499
  "special": true
500
  },
501
  "50318": {
502
  "content": "<|gl|>",
503
+ "lstrip": true,
504
  "normalized": false,
505
+ "rstrip": true,
506
  "single_word": false,
507
  "special": true
508
  },
509
  "50319": {
510
  "content": "<|mr|>",
511
+ "lstrip": true,
512
  "normalized": false,
513
+ "rstrip": true,
514
  "single_word": false,
515
  "special": true
516
  },
517
  "50320": {
518
  "content": "<|pa|>",
519
+ "lstrip": true,
520
  "normalized": false,
521
+ "rstrip": true,
522
  "single_word": false,
523
  "special": true
524
  },
525
  "50321": {
526
  "content": "<|si|>",
527
+ "lstrip": true,
528
  "normalized": false,
529
+ "rstrip": true,
530
  "single_word": false,
531
  "special": true
532
  },
533
  "50322": {
534
  "content": "<|km|>",
535
+ "lstrip": true,
536
  "normalized": false,
537
+ "rstrip": true,
538
  "single_word": false,
539
  "special": true
540
  },
541
  "50323": {
542
  "content": "<|sn|>",
543
+ "lstrip": true,
544
  "normalized": false,
545
+ "rstrip": true,
546
  "single_word": false,
547
  "special": true
548
  },
549
  "50324": {
550
  "content": "<|yo|>",
551
+ "lstrip": true,
552
  "normalized": false,
553
+ "rstrip": true,
554
  "single_word": false,
555
  "special": true
556
  },
557
  "50325": {
558
  "content": "<|so|>",
559
+ "lstrip": true,
560
  "normalized": false,
561
+ "rstrip": true,
562
  "single_word": false,
563
  "special": true
564
  },
565
  "50326": {
566
  "content": "<|af|>",
567
+ "lstrip": true,
568
  "normalized": false,
569
+ "rstrip": true,
570
  "single_word": false,
571
  "special": true
572
  },
573
  "50327": {
574
  "content": "<|oc|>",
575
+ "lstrip": true,
576
  "normalized": false,
577
+ "rstrip": true,
578
  "single_word": false,
579
  "special": true
580
  },
581
  "50328": {
582
  "content": "<|ka|>",
583
+ "lstrip": true,
584
  "normalized": false,
585
+ "rstrip": true,
586
  "single_word": false,
587
  "special": true
588
  },
589
  "50329": {
590
  "content": "<|be|>",
591
+ "lstrip": true,
592
  "normalized": false,
593
+ "rstrip": true,
594
  "single_word": false,
595
  "special": true
596
  },
597
  "50330": {
598
  "content": "<|tg|>",
599
+ "lstrip": true,
600
  "normalized": false,
601
+ "rstrip": true,
602
  "single_word": false,
603
  "special": true
604
  },
605
  "50331": {
606
  "content": "<|sd|>",
607
+ "lstrip": true,
608
  "normalized": false,
609
+ "rstrip": true,
610
  "single_word": false,
611
  "special": true
612
  },
613
  "50332": {
614
  "content": "<|gu|>",
615
+ "lstrip": true,
616
  "normalized": false,
617
+ "rstrip": true,
618
  "single_word": false,
619
  "special": true
620
  },
621
  "50333": {
622
  "content": "<|am|>",
623
+ "lstrip": true,
624
  "normalized": false,
625
+ "rstrip": true,
626
  "single_word": false,
627
  "special": true
628
  },
629
  "50334": {
630
  "content": "<|yi|>",
631
+ "lstrip": true,
632
  "normalized": false,
633
+ "rstrip": true,
634
  "single_word": false,
635
  "special": true
636
  },
637
  "50335": {
638
  "content": "<|lo|>",
639
+ "lstrip": true,
640
  "normalized": false,
641
+ "rstrip": true,
642
  "single_word": false,
643
  "special": true
644
  },
645
  "50336": {
646
  "content": "<|uz|>",
647
+ "lstrip": true,
648
  "normalized": false,
649
+ "rstrip": true,
650
  "single_word": false,
651
  "special": true
652
  },
653
  "50337": {
654
  "content": "<|fo|>",
655
+ "lstrip": true,
656
  "normalized": false,
657
+ "rstrip": true,
658
  "single_word": false,
659
  "special": true
660
  },
661
  "50338": {
662
  "content": "<|ht|>",
663
+ "lstrip": true,
664
  "normalized": false,
665
+ "rstrip": true,
666
  "single_word": false,
667
  "special": true
668
  },
669
  "50339": {
670
  "content": "<|ps|>",
671
+ "lstrip": true,
672
  "normalized": false,
673
+ "rstrip": true,
674
  "single_word": false,
675
  "special": true
676
  },
677
  "50340": {
678
  "content": "<|tk|>",
679
+ "lstrip": true,
680
  "normalized": false,
681
+ "rstrip": true,
682
  "single_word": false,
683
  "special": true
684
  },
685
  "50341": {
686
  "content": "<|nn|>",
687
+ "lstrip": true,
688
  "normalized": false,
689
+ "rstrip": true,
690
  "single_word": false,
691
  "special": true
692
  },
693
  "50342": {
694
  "content": "<|mt|>",
695
+ "lstrip": true,
696
  "normalized": false,
697
+ "rstrip": true,
698
  "single_word": false,
699
  "special": true
700
  },
701
  "50343": {
702
  "content": "<|sa|>",
703
+ "lstrip": true,
704
  "normalized": false,
705
+ "rstrip": true,
706
  "single_word": false,
707
  "special": true
708
  },
709
  "50344": {
710
  "content": "<|lb|>",
711
+ "lstrip": true,
712
  "normalized": false,
713
+ "rstrip": true,
714
  "single_word": false,
715
  "special": true
716
  },
717
  "50345": {
718
  "content": "<|my|>",
719
+ "lstrip": true,
720
  "normalized": false,
721
+ "rstrip": true,
722
  "single_word": false,
723
  "special": true
724
  },
725
  "50346": {
726
  "content": "<|bo|>",
727
+ "lstrip": true,
728
  "normalized": false,
729
+ "rstrip": true,
730
  "single_word": false,
731
  "special": true
732
  },
733
  "50347": {
734
  "content": "<|tl|>",
735
+ "lstrip": true,
736
  "normalized": false,
737
+ "rstrip": true,
738
  "single_word": false,
739
  "special": true
740
  },
741
  "50348": {
742
  "content": "<|mg|>",
743
+ "lstrip": true,
744
  "normalized": false,
745
+ "rstrip": true,
746
  "single_word": false,
747
  "special": true
748
  },
749
  "50349": {
750
  "content": "<|as|>",
751
+ "lstrip": true,
752
  "normalized": false,
753
+ "rstrip": true,
754
  "single_word": false,
755
  "special": true
756
  },
757
  "50350": {
758
  "content": "<|tt|>",
759
+ "lstrip": true,
760
  "normalized": false,
761
+ "rstrip": true,
762
  "single_word": false,
763
  "special": true
764
  },
765
  "50351": {
766
  "content": "<|haw|>",
767
+ "lstrip": true,
768
  "normalized": false,
769
+ "rstrip": true,
770
  "single_word": false,
771
  "special": true
772
  },
773
  "50352": {
774
  "content": "<|ln|>",
775
+ "lstrip": true,
776
  "normalized": false,
777
+ "rstrip": true,
778
  "single_word": false,
779
  "special": true
780
  },
781
  "50353": {
782
  "content": "<|ha|>",
783
+ "lstrip": true,
784
  "normalized": false,
785
+ "rstrip": true,
786
  "single_word": false,
787
  "special": true
788
  },
789
  "50354": {
790
  "content": "<|ba|>",
791
+ "lstrip": true,
792
  "normalized": false,
793
+ "rstrip": true,
794
  "single_word": false,
795
  "special": true
796
  },
797
  "50355": {
798
  "content": "<|jw|>",
799
+ "lstrip": true,
800
  "normalized": false,
801
+ "rstrip": true,
802
  "single_word": false,
803
  "special": true
804
  },
805
  "50356": {
806
  "content": "<|su|>",
807
+ "lstrip": true,
808
  "normalized": false,
809
+ "rstrip": true,
810
  "single_word": false,
811
  "special": true
812
  },
813
  "50357": {
814
  "content": "<|translate|>",
815
+ "lstrip": true,
816
  "normalized": false,
817
+ "rstrip": true,
818
  "single_word": false,
819
  "special": true
820
  },
821
  "50358": {
822
  "content": "<|transcribe|>",
823
+ "lstrip": true,
824
  "normalized": false,
825
+ "rstrip": true,
826
  "single_word": false,
827
  "special": true
828
  },
829
  "50359": {
830
  "content": "<|startoflm|>",
831
+ "lstrip": true,
832
  "normalized": false,
833
+ "rstrip": true,
834
  "single_word": false,
835
  "special": true
836
  },
837
  "50360": {
838
  "content": "<|startofprev|>",
839
+ "lstrip": true,
840
  "normalized": false,
841
+ "rstrip": true,
842
  "single_word": false,
843
  "special": true
844
  },
845
  "50361": {
846
  "content": "<|nocaptions|>",
847
+ "lstrip": true,
848
  "normalized": false,
849
+ "rstrip": true,
850
  "single_word": false,
851
  "special": true
852
  },
853
  "50362": {
854
  "content": "<|notimestamps|>",
855
+ "lstrip": true,
856
  "normalized": false,
857
+ "rstrip": true,
858
  "single_word": false,
859
  "special": true
860
  },
 
12984
  "processor_class": "WhisperProcessor",
12985
  "return_attention_mask": false,
12986
  "tokenizer_class": "WhisperTokenizer",
12987
+ "trust_remote_code": false,
12988
+ "unk_token": "<|endoftext|>",
12989
+ "use_fast": true
12990
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a66d3dfabdf98db8af67029e73c37d215a9505c1a2897141496593bf76d4adef
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89348f52306010c9939628e9f2b8015be02a830a896a5bb8e2b3c342cca6f734
3
  size 5304