marinone94 commited on
Commit
e4220c8
1 Parent(s): d559445

End of training

Browse files
all_results.json CHANGED
@@ -1,33 +1,33 @@
1
  {
2
  "epoch": 2.33,
3
  "eval_loss": 1.6191972494125366,
4
- "eval_pretrained_loss": 1.715580940246582,
5
- "eval_pretrained_runtime": 131.9468,
6
- "eval_pretrained_samples_per_second": 2.501,
7
- "eval_pretrained_steps_per_second": 0.318,
8
- "eval_pretrained_wer": 264.5217946670924,
9
  "eval_runtime": 56.3363,
10
  "eval_samples_per_second": 0.071,
11
  "eval_steps_per_second": 0.036,
12
  "eval_wer": 153.2258064516129,
13
- "test_finetuned_loss": 1.0018435716629028,
14
- "test_finetuned_runtime": 238.9736,
15
- "test_finetuned_samples_per_second": 3.176,
16
- "test_finetuned_steps_per_second": 0.398,
17
- "test_finetuned_wer": 173.15939719843325,
18
  "test_loss": 1.7568330764770508,
19
  "test_pretrained_loss": 1.724961757659912,
20
- "test_pretrained_runtime": 275.9851,
21
- "test_pretrained_samples_per_second": 2.75,
22
- "test_pretrained_steps_per_second": 0.344,
23
  "test_pretrained_wer": 261.9066587001262,
24
  "test_runtime": 37.8582,
25
  "test_samples_per_second": 0.106,
26
  "test_steps_per_second": 0.053,
27
  "test_wer": 138.5964912280702,
28
  "total_flos": 1.7572960198656e+17,
29
- "train_loss": 0.8751795228038516,
30
- "train_runtime": 2133.32,
31
- "train_samples_per_second": 3.36,
32
  "train_steps_per_second": 0.21
33
  }
 
1
  {
2
  "epoch": 2.33,
3
  "eval_loss": 1.6191972494125366,
4
+ "eval_pretrained_loss": 1.7155802249908447,
5
+ "eval_pretrained_runtime": 130.3147,
6
+ "eval_pretrained_samples_per_second": 2.532,
7
+ "eval_pretrained_steps_per_second": 0.322,
8
+ "eval_pretrained_wer": 264.42599393262014,
9
  "eval_runtime": 56.3363,
10
  "eval_samples_per_second": 0.071,
11
  "eval_steps_per_second": 0.036,
12
  "eval_wer": 153.2258064516129,
13
+ "test_finetuned_loss": 1.0018900632858276,
14
+ "test_finetuned_runtime": 230.5902,
15
+ "test_finetuned_samples_per_second": 3.292,
16
+ "test_finetuned_steps_per_second": 0.412,
17
+ "test_finetuned_wer": 172.070636659364,
18
  "test_loss": 1.7568330764770508,
19
  "test_pretrained_loss": 1.724961757659912,
20
+ "test_pretrained_runtime": 272.5213,
21
+ "test_pretrained_samples_per_second": 2.785,
22
+ "test_pretrained_steps_per_second": 0.349,
23
  "test_pretrained_wer": 261.9066587001262,
24
  "test_runtime": 37.8582,
25
  "test_samples_per_second": 0.106,
26
  "test_steps_per_second": 0.053,
27
  "test_wer": 138.5964912280702,
28
  "total_flos": 1.7572960198656e+17,
29
+ "train_loss": 0.8751970188958305,
30
+ "train_runtime": 2136.6213,
31
+ "train_samples_per_second": 3.355,
32
  "train_steps_per_second": 0.21
33
  }
config.json CHANGED
@@ -139,6 +139,8 @@
139
  49870,
140
  50254,
141
  50258,
 
 
142
  50360,
143
  50361,
144
  50362
 
139
  49870,
140
  50254,
141
  50258,
142
+ 50358,
143
+ 50359,
144
  50360,
145
  50361,
146
  50362
eval_pretrained_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "eval_pretrained_loss": 1.715580940246582,
3
- "eval_pretrained_runtime": 131.9468,
4
- "eval_pretrained_samples_per_second": 2.501,
5
- "eval_pretrained_steps_per_second": 0.318,
6
- "eval_pretrained_wer": 264.5217946670924
7
  }
 
1
  {
2
+ "eval_pretrained_loss": 1.7155802249908447,
3
+ "eval_pretrained_runtime": 130.3147,
4
+ "eval_pretrained_samples_per_second": 2.532,
5
+ "eval_pretrained_steps_per_second": 0.322,
6
+ "eval_pretrained_wer": 264.42599393262014
7
  }
generation_config.json CHANGED
@@ -211,6 +211,8 @@
211
  49870,
212
  50254,
213
  50258,
 
 
214
  50360,
215
  50361,
216
  50362
 
211
  49870,
212
  50254,
213
  50258,
214
+ 50358,
215
+ 50359,
216
  50360,
217
  50361,
218
  50362
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68bb0c59266f7c595eaaa4c27443c5b5bde3796bdbf17a2e1ef41723e9e02297
3
  size 151098921
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8451cf9e0f3b232b59003dac8b3033e1d7ef4a8bf8b4bd868cd1aa95b092f45
3
  size 151098921
test_finetuned_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.33,
3
- "test_finetuned_loss": 1.0018435716629028,
4
- "test_finetuned_runtime": 238.9736,
5
- "test_finetuned_samples_per_second": 3.176,
6
- "test_finetuned_steps_per_second": 0.398,
7
- "test_finetuned_wer": 173.15939719843325
8
  }
 
1
  {
2
  "epoch": 2.33,
3
+ "test_finetuned_loss": 1.0018900632858276,
4
+ "test_finetuned_runtime": 230.5902,
5
+ "test_finetuned_samples_per_second": 3.292,
6
+ "test_finetuned_steps_per_second": 0.412,
7
+ "test_finetuned_wer": 172.070636659364
8
  }
test_pretrained_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "test_pretrained_loss": 1.724961757659912,
3
- "test_pretrained_runtime": 275.9851,
4
- "test_pretrained_samples_per_second": 2.75,
5
- "test_pretrained_steps_per_second": 0.344,
6
  "test_pretrained_wer": 261.9066587001262
7
  }
 
1
  {
2
  "test_pretrained_loss": 1.724961757659912,
3
+ "test_pretrained_runtime": 272.5213,
4
+ "test_pretrained_samples_per_second": 2.785,
5
+ "test_pretrained_steps_per_second": 0.349,
6
  "test_pretrained_wer": 261.9066587001262
7
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.33,
3
  "total_flos": 1.7572960198656e+17,
4
- "train_loss": 0.8751795228038516,
5
- "train_runtime": 2133.32,
6
- "train_samples_per_second": 3.36,
7
  "train_steps_per_second": 0.21
8
  }
 
1
  {
2
  "epoch": 2.33,
3
  "total_flos": 1.7572960198656e+17,
4
+ "train_loss": 0.8751970188958305,
5
+ "train_runtime": 2136.6213,
6
+ "train_samples_per_second": 3.355,
7
  "train_steps_per_second": 0.21
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 173.47916334025228,
3
  "best_model_checkpoint": "./whisper-training-blog/checkpoint-396",
4
  "epoch": 2.330357142857143,
5
  "global_step": 448,
@@ -16,7 +16,7 @@
16
  {
17
  "epoch": 0.02,
18
  "learning_rate": 3.3333333333333335e-07,
19
- "loss": 1.6808,
20
  "step": 8
21
  },
22
  {
@@ -34,7 +34,7 @@
34
  {
35
  "epoch": 0.04,
36
  "learning_rate": 1e-06,
37
- "loss": 1.609,
38
  "step": 20
39
  },
40
  {
@@ -46,13 +46,13 @@
46
  {
47
  "epoch": 0.06,
48
  "learning_rate": 1.4444444444444445e-06,
49
- "loss": 1.608,
50
  "step": 28
51
  },
52
  {
53
  "epoch": 0.07,
54
  "learning_rate": 1.6666666666666667e-06,
55
- "loss": 1.5075,
56
  "step": 32
57
  },
58
  {
@@ -75,17 +75,17 @@
75
  },
76
  {
77
  "epoch": 0.1,
78
- "eval_loss": 1.4919109344482422,
79
- "eval_runtime": 118.4455,
80
- "eval_samples_per_second": 2.786,
81
- "eval_steps_per_second": 0.355,
82
- "eval_wer": 245.29778061631805,
83
  "step": 44
84
  },
85
  {
86
  "epoch": 0.11,
87
  "learning_rate": 2.5555555555555557e-06,
88
- "loss": 1.3192,
89
  "step": 48
90
  },
91
  {
@@ -97,7 +97,7 @@
97
  {
98
  "epoch": 0.12,
99
  "learning_rate": 3e-06,
100
- "loss": 1.2895,
101
  "step": 56
102
  },
103
  {
@@ -109,7 +109,7 @@
109
  {
110
  "epoch": 0.14,
111
  "learning_rate": 3.4444444444444444e-06,
112
- "loss": 1.3856,
113
  "step": 64
114
  },
115
  {
@@ -127,52 +127,52 @@
127
  {
128
  "epoch": 0.17,
129
  "learning_rate": 4.111111111111111e-06,
130
- "loss": 1.1661,
131
  "step": 76
132
  },
133
  {
134
  "epoch": 0.18,
135
  "learning_rate": 4.333333333333333e-06,
136
- "loss": 1.1351,
137
  "step": 80
138
  },
139
  {
140
  "epoch": 0.19,
141
  "learning_rate": 4.555555555555555e-06,
142
- "loss": 1.0729,
143
  "step": 84
144
  },
145
  {
146
  "epoch": 0.2,
147
  "learning_rate": 4.777777777777778e-06,
148
- "loss": 1.0501,
149
  "step": 88
150
  },
151
  {
152
  "epoch": 0.2,
153
- "eval_loss": 1.225467324256897,
154
- "eval_runtime": 119.277,
155
- "eval_samples_per_second": 2.767,
156
- "eval_steps_per_second": 0.352,
157
- "eval_wer": 219.94251955931662,
158
  "step": 88
159
  },
160
  {
161
  "epoch": 0.21,
162
  "learning_rate": 4.9999999999999996e-06,
163
- "loss": 1.1278,
164
  "step": 92
165
  },
166
  {
167
  "epoch": 0.21,
168
  "learning_rate": 5.2222222222222226e-06,
169
- "loss": 1.0563,
170
  "step": 96
171
  },
172
  {
173
  "epoch": 0.22,
174
  "learning_rate": 5.444444444444445e-06,
175
- "loss": 0.9929,
176
  "step": 100
177
  },
178
  {
@@ -196,13 +196,13 @@
196
  {
197
  "epoch": 0.26,
198
  "learning_rate": 6.333333333333333e-06,
199
- "loss": 0.9688,
200
  "step": 116
201
  },
202
  {
203
  "epoch": 0.27,
204
  "learning_rate": 6.555555555555556e-06,
205
- "loss": 0.9559,
206
  "step": 120
207
  },
208
  {
@@ -225,29 +225,29 @@
225
  },
226
  {
227
  "epoch": 0.29,
228
- "eval_loss": 1.120314359664917,
229
- "eval_runtime": 111.0003,
230
- "eval_samples_per_second": 2.973,
231
- "eval_steps_per_second": 0.378,
232
- "eval_wer": 205.7799776464953,
233
  "step": 132
234
  },
235
  {
236
  "epoch": 0.3,
237
  "learning_rate": 7.444444444444444e-06,
238
- "loss": 0.9171,
239
  "step": 136
240
  },
241
  {
242
  "epoch": 0.31,
243
  "learning_rate": 7.428115015974441e-06,
244
- "loss": 0.9445,
245
  "step": 140
246
  },
247
  {
248
  "epoch": 0.32,
249
  "learning_rate": 7.332268370607029e-06,
250
- "loss": 0.8926,
251
  "step": 144
252
  },
253
  {
@@ -259,7 +259,7 @@
259
  {
260
  "epoch": 1.0,
261
  "learning_rate": 7.140575079872205e-06,
262
- "loss": 1.239,
263
  "step": 152
264
  },
265
  {
@@ -271,13 +271,13 @@
271
  {
272
  "epoch": 1.02,
273
  "learning_rate": 6.948881789137381e-06,
274
- "loss": 1.0117,
275
  "step": 160
276
  },
277
  {
278
  "epoch": 1.03,
279
  "learning_rate": 6.853035143769968e-06,
280
- "loss": 0.9081,
281
  "step": 164
282
  },
283
  {
@@ -289,22 +289,22 @@
289
  {
290
  "epoch": 1.05,
291
  "learning_rate": 6.6613418530351436e-06,
292
- "loss": 0.8672,
293
  "step": 172
294
  },
295
  {
296
  "epoch": 1.06,
297
  "learning_rate": 6.565495207667732e-06,
298
- "loss": 0.8142,
299
  "step": 176
300
  },
301
  {
302
  "epoch": 1.06,
303
- "eval_loss": 1.0674585103988647,
304
- "eval_runtime": 102.4925,
305
- "eval_samples_per_second": 3.22,
306
- "eval_steps_per_second": 0.41,
307
- "eval_wer": 192.87881207089254,
308
  "step": 176
309
  },
310
  {
@@ -328,25 +328,25 @@
328
  {
329
  "epoch": 1.09,
330
  "learning_rate": 6.182108626198084e-06,
331
- "loss": 0.821,
332
  "step": 192
333
  },
334
  {
335
  "epoch": 1.1,
336
  "learning_rate": 6.086261980830671e-06,
337
- "loss": 0.8518,
338
  "step": 196
339
  },
340
  {
341
  "epoch": 1.11,
342
  "learning_rate": 5.990415335463259e-06,
343
- "loss": 0.7813,
344
  "step": 200
345
  },
346
  {
347
  "epoch": 1.12,
348
  "learning_rate": 5.894568690095847e-06,
349
- "loss": 0.7541,
350
  "step": 204
351
  },
352
  {
@@ -358,13 +358,13 @@
358
  {
359
  "epoch": 1.14,
360
  "learning_rate": 5.702875399361023e-06,
361
- "loss": 0.8182,
362
  "step": 212
363
  },
364
  {
365
  "epoch": 1.15,
366
  "learning_rate": 5.607028753993611e-06,
367
- "loss": 0.7461,
368
  "step": 216
369
  },
370
  {
@@ -375,11 +375,11 @@
375
  },
376
  {
377
  "epoch": 1.16,
378
- "eval_loss": 1.0393497943878174,
379
- "eval_runtime": 101.6759,
380
- "eval_samples_per_second": 3.246,
381
- "eval_steps_per_second": 0.413,
382
- "eval_wer": 178.42886795465432,
383
  "step": 220
384
  },
385
  {
@@ -391,13 +391,13 @@
391
  {
392
  "epoch": 1.17,
393
  "learning_rate": 5.319488817891374e-06,
394
- "loss": 0.7662,
395
  "step": 228
396
  },
397
  {
398
  "epoch": 1.18,
399
  "learning_rate": 5.223642172523962e-06,
400
- "loss": 0.7253,
401
  "step": 232
402
  },
403
  {
@@ -415,7 +415,7 @@
415
  {
416
  "epoch": 1.21,
417
  "learning_rate": 4.936102236421725e-06,
418
- "loss": 0.7841,
419
  "step": 244
420
  },
421
  {
@@ -427,13 +427,13 @@
427
  {
428
  "epoch": 1.23,
429
  "learning_rate": 4.744408945686901e-06,
430
- "loss": 0.7077,
431
  "step": 252
432
  },
433
  {
434
  "epoch": 1.24,
435
  "learning_rate": 4.648562300319489e-06,
436
- "loss": 0.7812,
437
  "step": 256
438
  },
439
  {
@@ -450,11 +450,11 @@
450
  },
451
  {
452
  "epoch": 1.25,
453
- "eval_loss": 1.0302220582962036,
454
- "eval_runtime": 114.7389,
455
- "eval_samples_per_second": 2.876,
456
- "eval_steps_per_second": 0.366,
457
- "eval_wer": 216.6054606418649,
458
  "step": 264
459
  },
460
  {
@@ -466,19 +466,19 @@
466
  {
467
  "epoch": 1.27,
468
  "learning_rate": 4.26517571884984e-06,
469
- "loss": 0.7187,
470
  "step": 272
471
  },
472
  {
473
  "epoch": 1.28,
474
  "learning_rate": 4.169329073482428e-06,
475
- "loss": 0.6699,
476
  "step": 276
477
  },
478
  {
479
  "epoch": 1.29,
480
  "learning_rate": 4.0734824281150155e-06,
481
- "loss": 0.5677,
482
  "step": 280
483
  },
484
  {
@@ -508,7 +508,7 @@
508
  {
509
  "epoch": 1.33,
510
  "learning_rate": 3.5942492012779555e-06,
511
- "loss": 0.8766,
512
  "step": 300
513
  },
514
  {
@@ -520,34 +520,34 @@
520
  {
521
  "epoch": 2.02,
522
  "learning_rate": 3.4025559105431313e-06,
523
- "loss": 0.6971,
524
  "step": 308
525
  },
526
  {
527
  "epoch": 2.02,
528
- "eval_loss": 1.0134836435317993,
529
- "eval_runtime": 101.0358,
530
- "eval_samples_per_second": 3.266,
531
- "eval_steps_per_second": 0.416,
532
- "eval_wer": 179.3709085102986,
533
  "step": 308
534
  },
535
  {
536
  "epoch": 2.03,
537
  "learning_rate": 3.306709265175719e-06,
538
- "loss": 0.7432,
539
  "step": 312
540
  },
541
  {
542
  "epoch": 2.04,
543
  "learning_rate": 3.2108626198083067e-06,
544
- "loss": 0.6264,
545
  "step": 316
546
  },
547
  {
548
  "epoch": 2.04,
549
  "learning_rate": 3.1150159744408946e-06,
550
- "loss": 0.6604,
551
  "step": 320
552
  },
553
  {
@@ -559,13 +559,13 @@
559
  {
560
  "epoch": 2.06,
561
  "learning_rate": 2.9233226837060704e-06,
562
- "loss": 0.6539,
563
  "step": 328
564
  },
565
  {
566
  "epoch": 2.07,
567
  "learning_rate": 2.8274760383386583e-06,
568
- "loss": 0.611,
569
  "step": 332
570
  },
571
  {
@@ -583,28 +583,28 @@
583
  {
584
  "epoch": 2.1,
585
  "learning_rate": 2.539936102236422e-06,
586
- "loss": 0.6642,
587
  "step": 344
588
  },
589
  {
590
  "epoch": 2.11,
591
  "learning_rate": 2.44408945686901e-06,
592
- "loss": 0.6147,
593
  "step": 348
594
  },
595
  {
596
  "epoch": 2.12,
597
  "learning_rate": 2.3482428115015974e-06,
598
- "loss": 0.6051,
599
  "step": 352
600
  },
601
  {
602
  "epoch": 2.12,
603
- "eval_loss": 1.0064767599105835,
604
- "eval_runtime": 107.523,
605
- "eval_samples_per_second": 3.069,
606
- "eval_steps_per_second": 0.391,
607
- "eval_wer": 194.63515886955133,
608
  "step": 352
609
  },
610
  {
@@ -628,7 +628,7 @@
628
  {
629
  "epoch": 2.15,
630
  "learning_rate": 1.964856230031949e-06,
631
- "loss": 0.6012,
632
  "step": 368
633
  },
634
  {
@@ -640,19 +640,19 @@
640
  {
641
  "epoch": 2.17,
642
  "learning_rate": 1.7731629392971245e-06,
643
- "loss": 0.6288,
644
  "step": 376
645
  },
646
  {
647
  "epoch": 2.18,
648
  "learning_rate": 1.6773162939297124e-06,
649
- "loss": 0.6079,
650
  "step": 380
651
  },
652
  {
653
  "epoch": 2.19,
654
  "learning_rate": 1.5814696485623003e-06,
655
- "loss": 0.5887,
656
  "step": 384
657
  },
658
  {
@@ -670,22 +670,22 @@
670
  {
671
  "epoch": 2.21,
672
  "learning_rate": 1.2939297124600638e-06,
673
- "loss": 0.6048,
674
  "step": 396
675
  },
676
  {
677
  "epoch": 2.21,
678
- "eval_loss": 1.002966284751892,
679
- "eval_runtime": 97.8102,
680
- "eval_samples_per_second": 3.374,
681
- "eval_steps_per_second": 0.429,
682
- "eval_wer": 173.47916334025228,
683
  "step": 396
684
  },
685
  {
686
  "epoch": 2.22,
687
  "learning_rate": 1.1980830670926517e-06,
688
- "loss": 0.5774,
689
  "step": 400
690
  },
691
  {
@@ -703,13 +703,13 @@
703
  {
704
  "epoch": 2.25,
705
  "learning_rate": 9.105431309904153e-07,
706
- "loss": 0.5618,
707
  "step": 412
708
  },
709
  {
710
  "epoch": 2.26,
711
  "learning_rate": 8.146964856230032e-07,
712
- "loss": 0.5729,
713
  "step": 416
714
  },
715
  {
@@ -733,7 +733,7 @@
733
  {
734
  "epoch": 2.29,
735
  "learning_rate": 4.313099041533546e-07,
736
- "loss": 0.5544,
737
  "step": 432
738
  },
739
  {
@@ -745,16 +745,16 @@
745
  {
746
  "epoch": 2.31,
747
  "learning_rate": 2.3961661341853033e-07,
748
- "loss": 0.585,
749
  "step": 440
750
  },
751
  {
752
  "epoch": 2.31,
753
- "eval_loss": 1.0049320459365845,
754
- "eval_runtime": 105.5439,
755
- "eval_samples_per_second": 3.127,
756
- "eval_steps_per_second": 0.398,
757
- "eval_wer": 186.6677311192719,
758
  "step": 440
759
  },
760
  {
@@ -773,9 +773,9 @@
773
  "epoch": 2.33,
774
  "step": 448,
775
  "total_flos": 1.7572960198656e+17,
776
- "train_loss": 0.8751795228038516,
777
- "train_runtime": 2133.32,
778
- "train_samples_per_second": 3.36,
779
  "train_steps_per_second": 0.21
780
  }
781
  ],
 
1
  {
2
+ "best_metric": 160.91330033530258,
3
  "best_model_checkpoint": "./whisper-training-blog/checkpoint-396",
4
  "epoch": 2.330357142857143,
5
  "global_step": 448,
 
16
  {
17
  "epoch": 0.02,
18
  "learning_rate": 3.3333333333333335e-07,
19
+ "loss": 1.6809,
20
  "step": 8
21
  },
22
  {
 
34
  {
35
  "epoch": 0.04,
36
  "learning_rate": 1e-06,
37
+ "loss": 1.6091,
38
  "step": 20
39
  },
40
  {
 
46
  {
47
  "epoch": 0.06,
48
  "learning_rate": 1.4444444444444445e-06,
49
+ "loss": 1.6081,
50
  "step": 28
51
  },
52
  {
53
  "epoch": 0.07,
54
  "learning_rate": 1.6666666666666667e-06,
55
+ "loss": 1.5076,
56
  "step": 32
57
  },
58
  {
 
75
  },
76
  {
77
  "epoch": 0.1,
78
+ "eval_loss": 1.4919402599334717,
79
+ "eval_runtime": 119.3982,
80
+ "eval_samples_per_second": 2.764,
81
+ "eval_steps_per_second": 0.352,
82
+ "eval_wer": 245.34568098355422,
83
  "step": 44
84
  },
85
  {
86
  "epoch": 0.11,
87
  "learning_rate": 2.5555555555555557e-06,
88
+ "loss": 1.3193,
89
  "step": 48
90
  },
91
  {
 
97
  {
98
  "epoch": 0.12,
99
  "learning_rate": 3e-06,
100
+ "loss": 1.2896,
101
  "step": 56
102
  },
103
  {
 
109
  {
110
  "epoch": 0.14,
111
  "learning_rate": 3.4444444444444444e-06,
112
+ "loss": 1.3855,
113
  "step": 64
114
  },
115
  {
 
127
  {
128
  "epoch": 0.17,
129
  "learning_rate": 4.111111111111111e-06,
130
+ "loss": 1.1663,
131
  "step": 76
132
  },
133
  {
134
  "epoch": 0.18,
135
  "learning_rate": 4.333333333333333e-06,
136
+ "loss": 1.1352,
137
  "step": 80
138
  },
139
  {
140
  "epoch": 0.19,
141
  "learning_rate": 4.555555555555555e-06,
142
+ "loss": 1.0731,
143
  "step": 84
144
  },
145
  {
146
  "epoch": 0.2,
147
  "learning_rate": 4.777777777777778e-06,
148
+ "loss": 1.0502,
149
  "step": 88
150
  },
151
  {
152
  "epoch": 0.2,
153
+ "eval_loss": 1.2254914045333862,
154
+ "eval_runtime": 123.9853,
155
+ "eval_samples_per_second": 2.662,
156
+ "eval_steps_per_second": 0.339,
157
+ "eval_wer": 220.1500878173399,
158
  "step": 88
159
  },
160
  {
161
  "epoch": 0.21,
162
  "learning_rate": 4.9999999999999996e-06,
163
+ "loss": 1.128,
164
  "step": 92
165
  },
166
  {
167
  "epoch": 0.21,
168
  "learning_rate": 5.2222222222222226e-06,
169
+ "loss": 1.0564,
170
  "step": 96
171
  },
172
  {
173
  "epoch": 0.22,
174
  "learning_rate": 5.444444444444445e-06,
175
+ "loss": 0.993,
176
  "step": 100
177
  },
178
  {
 
196
  {
197
  "epoch": 0.26,
198
  "learning_rate": 6.333333333333333e-06,
199
+ "loss": 0.9687,
200
  "step": 116
201
  },
202
  {
203
  "epoch": 0.27,
204
  "learning_rate": 6.555555555555556e-06,
205
+ "loss": 0.9558,
206
  "step": 120
207
  },
208
  {
 
225
  },
226
  {
227
  "epoch": 0.29,
228
+ "eval_loss": 1.1202749013900757,
229
+ "eval_runtime": 108.7054,
230
+ "eval_samples_per_second": 3.036,
231
+ "eval_steps_per_second": 0.386,
232
+ "eval_wer": 206.24301452977804,
233
  "step": 132
234
  },
235
  {
236
  "epoch": 0.3,
237
  "learning_rate": 7.444444444444444e-06,
238
+ "loss": 0.917,
239
  "step": 136
240
  },
241
  {
242
  "epoch": 0.31,
243
  "learning_rate": 7.428115015974441e-06,
244
+ "loss": 0.9444,
245
  "step": 140
246
  },
247
  {
248
  "epoch": 0.32,
249
  "learning_rate": 7.332268370607029e-06,
250
+ "loss": 0.8927,
251
  "step": 144
252
  },
253
  {
 
259
  {
260
  "epoch": 1.0,
261
  "learning_rate": 7.140575079872205e-06,
262
+ "loss": 1.2391,
263
  "step": 152
264
  },
265
  {
 
271
  {
272
  "epoch": 1.02,
273
  "learning_rate": 6.948881789137381e-06,
274
+ "loss": 1.0116,
275
  "step": 160
276
  },
277
  {
278
  "epoch": 1.03,
279
  "learning_rate": 6.853035143769968e-06,
280
+ "loss": 0.9082,
281
  "step": 164
282
  },
283
  {
 
289
  {
290
  "epoch": 1.05,
291
  "learning_rate": 6.6613418530351436e-06,
292
+ "loss": 0.8671,
293
  "step": 172
294
  },
295
  {
296
  "epoch": 1.06,
297
  "learning_rate": 6.565495207667732e-06,
298
+ "loss": 0.8141,
299
  "step": 176
300
  },
301
  {
302
  "epoch": 1.06,
303
+ "eval_loss": 1.067484736442566,
304
+ "eval_runtime": 108.347,
305
+ "eval_samples_per_second": 3.046,
306
+ "eval_steps_per_second": 0.388,
307
+ "eval_wer": 201.96391505668208,
308
  "step": 176
309
  },
310
  {
 
328
  {
329
  "epoch": 1.09,
330
  "learning_rate": 6.182108626198084e-06,
331
+ "loss": 0.8211,
332
  "step": 192
333
  },
334
  {
335
  "epoch": 1.1,
336
  "learning_rate": 6.086261980830671e-06,
337
+ "loss": 0.8517,
338
  "step": 196
339
  },
340
  {
341
  "epoch": 1.11,
342
  "learning_rate": 5.990415335463259e-06,
343
+ "loss": 0.7814,
344
  "step": 200
345
  },
346
  {
347
  "epoch": 1.12,
348
  "learning_rate": 5.894568690095847e-06,
349
+ "loss": 0.7542,
350
  "step": 204
351
  },
352
  {
 
358
  {
359
  "epoch": 1.14,
360
  "learning_rate": 5.702875399361023e-06,
361
+ "loss": 0.8181,
362
  "step": 212
363
  },
364
  {
365
  "epoch": 1.15,
366
  "learning_rate": 5.607028753993611e-06,
367
+ "loss": 0.7462,
368
  "step": 216
369
  },
370
  {
 
375
  },
376
  {
377
  "epoch": 1.16,
378
+ "eval_loss": 1.0393874645233154,
379
+ "eval_runtime": 100.1655,
380
+ "eval_samples_per_second": 3.295,
381
+ "eval_steps_per_second": 0.419,
382
+ "eval_wer": 178.36500079833945,
383
  "step": 220
384
  },
385
  {
 
391
  {
392
  "epoch": 1.17,
393
  "learning_rate": 5.319488817891374e-06,
394
+ "loss": 0.7663,
395
  "step": 228
396
  },
397
  {
398
  "epoch": 1.18,
399
  "learning_rate": 5.223642172523962e-06,
400
+ "loss": 0.7254,
401
  "step": 232
402
  },
403
  {
 
415
  {
416
  "epoch": 1.21,
417
  "learning_rate": 4.936102236421725e-06,
418
+ "loss": 0.7842,
419
  "step": 244
420
  },
421
  {
 
427
  {
428
  "epoch": 1.23,
429
  "learning_rate": 4.744408945686901e-06,
430
+ "loss": 0.7075,
431
  "step": 252
432
  },
433
  {
434
  "epoch": 1.24,
435
  "learning_rate": 4.648562300319489e-06,
436
+ "loss": 0.7811,
437
  "step": 256
438
  },
439
  {
 
450
  },
451
  {
452
  "epoch": 1.25,
453
+ "eval_loss": 1.030145525932312,
454
+ "eval_runtime": 114.3297,
455
+ "eval_samples_per_second": 2.886,
456
+ "eval_steps_per_second": 0.367,
457
+ "eval_wer": 221.2996966310075,
458
  "step": 264
459
  },
460
  {
 
466
  {
467
  "epoch": 1.27,
468
  "learning_rate": 4.26517571884984e-06,
469
+ "loss": 0.7186,
470
  "step": 272
471
  },
472
  {
473
  "epoch": 1.28,
474
  "learning_rate": 4.169329073482428e-06,
475
+ "loss": 0.6698,
476
  "step": 276
477
  },
478
  {
479
  "epoch": 1.29,
480
  "learning_rate": 4.0734824281150155e-06,
481
+ "loss": 0.5678,
482
  "step": 280
483
  },
484
  {
 
508
  {
509
  "epoch": 1.33,
510
  "learning_rate": 3.5942492012779555e-06,
511
+ "loss": 0.8765,
512
  "step": 300
513
  },
514
  {
 
520
  {
521
  "epoch": 2.02,
522
  "learning_rate": 3.4025559105431313e-06,
523
+ "loss": 0.6972,
524
  "step": 308
525
  },
526
  {
527
  "epoch": 2.02,
528
+ "eval_loss": 1.0134402513504028,
529
+ "eval_runtime": 102.1008,
530
+ "eval_samples_per_second": 3.232,
531
+ "eval_steps_per_second": 0.411,
532
+ "eval_wer": 176.67252115599553,
533
  "step": 308
534
  },
535
  {
536
  "epoch": 2.03,
537
  "learning_rate": 3.306709265175719e-06,
538
+ "loss": 0.7431,
539
  "step": 312
540
  },
541
  {
542
  "epoch": 2.04,
543
  "learning_rate": 3.2108626198083067e-06,
544
+ "loss": 0.6265,
545
  "step": 316
546
  },
547
  {
548
  "epoch": 2.04,
549
  "learning_rate": 3.1150159744408946e-06,
550
+ "loss": 0.6606,
551
  "step": 320
552
  },
553
  {
 
559
  {
560
  "epoch": 2.06,
561
  "learning_rate": 2.9233226837060704e-06,
562
+ "loss": 0.654,
563
  "step": 328
564
  },
565
  {
566
  "epoch": 2.07,
567
  "learning_rate": 2.8274760383386583e-06,
568
+ "loss": 0.6111,
569
  "step": 332
570
  },
571
  {
 
583
  {
584
  "epoch": 2.1,
585
  "learning_rate": 2.539936102236422e-06,
586
+ "loss": 0.6643,
587
  "step": 344
588
  },
589
  {
590
  "epoch": 2.11,
591
  "learning_rate": 2.44408945686901e-06,
592
+ "loss": 0.6146,
593
  "step": 348
594
  },
595
  {
596
  "epoch": 2.12,
597
  "learning_rate": 2.3482428115015974e-06,
598
+ "loss": 0.6052,
599
  "step": 352
600
  },
601
  {
602
  "epoch": 2.12,
603
+ "eval_loss": 1.006484866142273,
604
+ "eval_runtime": 105.4228,
605
+ "eval_samples_per_second": 3.13,
606
+ "eval_steps_per_second": 0.398,
607
+ "eval_wer": 194.7149928149449,
608
  "step": 352
609
  },
610
  {
 
628
  {
629
  "epoch": 2.15,
630
  "learning_rate": 1.964856230031949e-06,
631
+ "loss": 0.6017,
632
  "step": 368
633
  },
634
  {
 
640
  {
641
  "epoch": 2.17,
642
  "learning_rate": 1.7731629392971245e-06,
643
+ "loss": 0.6289,
644
  "step": 376
645
  },
646
  {
647
  "epoch": 2.18,
648
  "learning_rate": 1.6773162939297124e-06,
649
+ "loss": 0.608,
650
  "step": 380
651
  },
652
  {
653
  "epoch": 2.19,
654
  "learning_rate": 1.5814696485623003e-06,
655
+ "loss": 0.5888,
656
  "step": 384
657
  },
658
  {
 
670
  {
671
  "epoch": 2.21,
672
  "learning_rate": 1.2939297124600638e-06,
673
+ "loss": 0.6047,
674
  "step": 396
675
  },
676
  {
677
  "epoch": 2.21,
678
+ "eval_loss": 1.002995491027832,
679
+ "eval_runtime": 93.4309,
680
+ "eval_samples_per_second": 3.532,
681
+ "eval_steps_per_second": 0.45,
682
+ "eval_wer": 160.91330033530258,
683
  "step": 396
684
  },
685
  {
686
  "epoch": 2.22,
687
  "learning_rate": 1.1980830670926517e-06,
688
+ "loss": 0.5772,
689
  "step": 400
690
  },
691
  {
 
703
  {
704
  "epoch": 2.25,
705
  "learning_rate": 9.105431309904153e-07,
706
+ "loss": 0.5617,
707
  "step": 412
708
  },
709
  {
710
  "epoch": 2.26,
711
  "learning_rate": 8.146964856230032e-07,
712
+ "loss": 0.5728,
713
  "step": 416
714
  },
715
  {
 
733
  {
734
  "epoch": 2.29,
735
  "learning_rate": 4.313099041533546e-07,
736
+ "loss": 0.5543,
737
  "step": 432
738
  },
739
  {
 
745
  {
746
  "epoch": 2.31,
747
  "learning_rate": 2.3961661341853033e-07,
748
+ "loss": 0.5849,
749
  "step": 440
750
  },
751
  {
752
  "epoch": 2.31,
753
+ "eval_loss": 1.0050277709960938,
754
+ "eval_runtime": 101.6428,
755
+ "eval_samples_per_second": 3.247,
756
+ "eval_steps_per_second": 0.413,
757
+ "eval_wer": 180.05748044068338,
758
  "step": 440
759
  },
760
  {
 
773
  "epoch": 2.33,
774
  "step": 448,
775
  "total_flos": 1.7572960198656e+17,
776
+ "train_loss": 0.8751970188958305,
777
+ "train_runtime": 2136.6213,
778
+ "train_samples_per_second": 3.355,
779
  "train_steps_per_second": 0.21
780
  }
781
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:098eb422614f2b69cada5c370a7aafa45f3e0897c25ca997abbd0fbb3b31385a
3
  size 3707
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fb52f407e48481c35659717bd5c261226233eadb5d658c9543eba6275568644
3
  size 3707