xezpeleta commited on
Commit
868f2a0
1 Parent(s): c005abf

End of training

Browse files
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 2.18,
3
- "eval_loss": 0.23755376040935516,
4
- "eval_runtime": 1799.3473,
5
- "eval_samples_per_second": 3.663,
6
- "eval_steps_per_second": 0.458,
7
- "eval_wer": 14.119648426424725,
8
- "train_loss": 0.191678307980299,
9
- "train_runtime": 39548.417,
10
- "train_samples_per_second": 0.809,
11
- "train_steps_per_second": 0.202
12
  }
 
1
  {
2
  "epoch": 2.18,
3
+ "eval_loss": 0.22874309122562408,
4
+ "eval_runtime": 1773.1655,
5
+ "eval_samples_per_second": 3.717,
6
+ "eval_steps_per_second": 0.465,
7
+ "eval_wer": 12.839726193851513,
8
+ "train_loss": 0.18796414549276233,
9
+ "train_runtime": 39400.0429,
10
+ "train_samples_per_second": 0.812,
11
+ "train_steps_per_second": 0.203
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.18,
3
- "eval_loss": 0.23755376040935516,
4
- "eval_runtime": 1799.3473,
5
- "eval_samples_per_second": 3.663,
6
- "eval_steps_per_second": 0.458,
7
- "eval_wer": 14.119648426424725
8
  }
 
1
  {
2
  "epoch": 2.18,
3
+ "eval_loss": 0.22874309122562408,
4
+ "eval_runtime": 1773.1655,
5
+ "eval_samples_per_second": 3.717,
6
+ "eval_steps_per_second": 0.465,
7
+ "eval_wer": 12.839726193851513
8
  }
runs/Jul24_13-06-26_tknadmin-System-Product-Name/events.out.tfevents.1690241884.tknadmin-System-Product-Name.553148.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a58f6b28fa624911307371dd3e26204b38d1fa42b29d3283bf94f5f0501670f4
3
+ size 358
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 2.18,
3
- "train_loss": 0.191678307980299,
4
- "train_runtime": 39548.417,
5
- "train_samples_per_second": 0.809,
6
- "train_steps_per_second": 0.202
7
  }
 
1
  {
2
  "epoch": 2.18,
3
+ "train_loss": 0.18796414549276233,
4
+ "train_runtime": 39400.0429,
5
+ "train_samples_per_second": 0.812,
6
+ "train_steps_per_second": 0.203
7
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 14.119648426424725,
3
- "best_model_checkpoint": "./checkpoint-8000",
4
  "epoch": 2.176,
5
  "global_step": 8000,
6
  "is_hyper_param_search": false,
@@ -9,2076 +9,2076 @@
9
  "log_history": [
10
  {
11
  "epoch": 0.0,
12
- "learning_rate": 2.76e-07,
13
- "loss": 1.4248,
14
  "step": 25
15
  },
16
  {
17
  "epoch": 0.01,
18
- "learning_rate": 5.64e-07,
19
- "loss": 1.3686,
20
  "step": 50
21
  },
22
  {
23
  "epoch": 0.01,
24
- "learning_rate": 8.64e-07,
25
- "loss": 1.1529,
26
  "step": 75
27
  },
28
  {
29
  "epoch": 0.01,
30
- "learning_rate": 1.164e-06,
31
- "loss": 0.8748,
32
  "step": 100
33
  },
34
  {
35
  "epoch": 0.02,
36
- "learning_rate": 1.464e-06,
37
- "loss": 0.7645,
38
  "step": 125
39
  },
40
  {
41
  "epoch": 0.02,
42
- "learning_rate": 1.764e-06,
43
- "loss": 0.71,
44
  "step": 150
45
  },
46
  {
47
  "epoch": 0.02,
48
- "learning_rate": 2.064e-06,
49
- "loss": 0.7174,
50
  "step": 175
51
  },
52
  {
53
  "epoch": 0.03,
54
- "learning_rate": 2.364e-06,
55
- "loss": 0.6369,
56
  "step": 200
57
  },
58
  {
59
  "epoch": 0.03,
60
- "learning_rate": 2.6640000000000002e-06,
61
- "loss": 0.5867,
62
  "step": 225
63
  },
64
  {
65
  "epoch": 0.03,
66
- "learning_rate": 2.964e-06,
67
- "loss": 0.6348,
68
  "step": 250
69
  },
70
  {
71
  "epoch": 0.03,
72
- "learning_rate": 3.2640000000000004e-06,
73
- "loss": 0.6222,
74
  "step": 275
75
  },
76
  {
77
  "epoch": 0.04,
78
- "learning_rate": 3.564e-06,
79
- "loss": 0.5447,
80
  "step": 300
81
  },
82
  {
83
  "epoch": 0.04,
84
- "learning_rate": 3.864000000000001e-06,
85
- "loss": 0.5093,
86
  "step": 325
87
  },
88
  {
89
  "epoch": 0.04,
90
- "learning_rate": 4.1639999999999994e-06,
91
- "loss": 0.5134,
92
  "step": 350
93
  },
94
  {
95
  "epoch": 0.05,
96
- "learning_rate": 4.464e-06,
97
- "loss": 0.4913,
98
  "step": 375
99
  },
100
  {
101
  "epoch": 0.05,
102
- "learning_rate": 4.7640000000000005e-06,
103
- "loss": 0.5007,
104
  "step": 400
105
  },
106
  {
107
  "epoch": 0.05,
108
- "learning_rate": 5.064e-06,
109
- "loss": 0.4233,
110
  "step": 425
111
  },
112
  {
113
  "epoch": 0.06,
114
- "learning_rate": 5.364000000000001e-06,
115
- "loss": 0.3944,
116
  "step": 450
117
  },
118
  {
119
  "epoch": 0.06,
120
- "learning_rate": 5.6639999999999995e-06,
121
- "loss": 0.4615,
122
  "step": 475
123
  },
124
  {
125
  "epoch": 0.06,
126
- "learning_rate": 5.964e-06,
127
- "loss": 0.443,
128
  "step": 500
129
  },
130
  {
131
  "epoch": 0.06,
132
- "eval_loss": 0.5036891102790833,
133
- "eval_runtime": 1812.9009,
134
- "eval_samples_per_second": 3.636,
135
- "eval_steps_per_second": 0.455,
136
- "eval_wer": 37.42962452914254,
137
  "step": 500
138
  },
139
  {
140
  "epoch": 0.07,
141
- "learning_rate": 5.9824e-06,
142
- "loss": 0.458,
143
  "step": 525
144
  },
145
  {
146
  "epoch": 0.07,
147
- "learning_rate": 5.962400000000001e-06,
148
- "loss": 0.4264,
149
  "step": 550
150
  },
151
  {
152
  "epoch": 0.07,
153
- "learning_rate": 5.9424e-06,
154
- "loss": 0.4405,
155
  "step": 575
156
  },
157
  {
158
  "epoch": 0.07,
159
- "learning_rate": 5.9224e-06,
160
- "loss": 0.4451,
161
  "step": 600
162
  },
163
  {
164
  "epoch": 0.08,
165
- "learning_rate": 5.9024000000000004e-06,
166
- "loss": 0.416,
167
  "step": 625
168
  },
169
  {
170
  "epoch": 0.08,
171
- "learning_rate": 5.882400000000001e-06,
172
- "loss": 0.4193,
173
  "step": 650
174
  },
175
  {
176
  "epoch": 0.08,
177
- "learning_rate": 5.8624e-06,
178
- "loss": 0.4029,
179
  "step": 675
180
  },
181
  {
182
  "epoch": 0.09,
183
- "learning_rate": 5.8424e-06,
184
- "loss": 0.4028,
185
  "step": 700
186
  },
187
  {
188
  "epoch": 0.09,
189
- "learning_rate": 5.8224e-06,
190
- "loss": 0.3613,
191
  "step": 725
192
  },
193
  {
194
  "epoch": 0.09,
195
- "learning_rate": 5.8024e-06,
196
- "loss": 0.3995,
197
  "step": 750
198
  },
199
  {
200
  "epoch": 0.1,
201
- "learning_rate": 5.7824e-06,
202
- "loss": 0.3865,
203
  "step": 775
204
  },
205
  {
206
  "epoch": 0.1,
207
- "learning_rate": 5.7624e-06,
208
- "loss": 0.3534,
209
  "step": 800
210
  },
211
  {
212
  "epoch": 0.1,
213
- "learning_rate": 5.7424e-06,
214
- "loss": 0.3741,
215
  "step": 825
216
  },
217
  {
218
  "epoch": 0.11,
219
- "learning_rate": 5.7224000000000005e-06,
220
- "loss": 0.4243,
221
  "step": 850
222
  },
223
  {
224
  "epoch": 0.11,
225
- "learning_rate": 5.702400000000001e-06,
226
- "loss": 0.387,
227
  "step": 875
228
  },
229
  {
230
  "epoch": 0.11,
231
- "learning_rate": 5.6824e-06,
232
- "loss": 0.3726,
233
  "step": 900
234
  },
235
  {
236
  "epoch": 0.12,
237
- "learning_rate": 5.6624e-06,
238
- "loss": 0.4141,
239
  "step": 925
240
  },
241
  {
242
  "epoch": 0.12,
243
- "learning_rate": 5.6424e-06,
244
- "loss": 0.3553,
245
  "step": 950
246
  },
247
  {
248
  "epoch": 0.12,
249
- "learning_rate": 5.622400000000001e-06,
250
- "loss": 0.4188,
251
  "step": 975
252
  },
253
  {
254
  "epoch": 0.12,
255
- "learning_rate": 5.6024e-06,
256
- "loss": 0.4196,
257
  "step": 1000
258
  },
259
  {
260
  "epoch": 0.12,
261
- "eval_loss": 0.40096473693847656,
262
- "eval_runtime": 1827.7488,
263
- "eval_samples_per_second": 3.606,
264
- "eval_steps_per_second": 0.451,
265
- "eval_wer": 28.913686257037547,
266
  "step": 1000
267
  },
268
  {
269
  "epoch": 0.13,
270
- "learning_rate": 5.5824e-06,
271
- "loss": 0.4134,
272
  "step": 1025
273
  },
274
  {
275
  "epoch": 0.13,
276
- "learning_rate": 5.5624e-06,
277
- "loss": 0.3777,
278
  "step": 1050
279
  },
280
  {
281
  "epoch": 0.13,
282
- "learning_rate": 5.5424e-06,
283
- "loss": 0.3565,
284
  "step": 1075
285
  },
286
  {
287
  "epoch": 0.14,
288
- "learning_rate": 5.5224e-06,
289
- "loss": 0.3878,
290
  "step": 1100
291
  },
292
  {
293
  "epoch": 0.14,
294
- "learning_rate": 5.5024e-06,
295
- "loss": 0.3691,
296
  "step": 1125
297
  },
298
  {
299
  "epoch": 0.14,
300
- "learning_rate": 5.4824e-06,
301
- "loss": 0.3647,
302
  "step": 1150
303
  },
304
  {
305
  "epoch": 0.15,
306
- "learning_rate": 5.4624e-06,
307
- "loss": 0.3352,
308
  "step": 1175
309
  },
310
  {
311
  "epoch": 0.15,
312
- "learning_rate": 5.442400000000001e-06,
313
- "loss": 0.3047,
314
  "step": 1200
315
  },
316
  {
317
  "epoch": 0.15,
318
- "learning_rate": 5.422400000000001e-06,
319
- "loss": 0.2753,
320
  "step": 1225
321
  },
322
  {
323
  "epoch": 0.16,
324
- "learning_rate": 5.4024e-06,
325
- "loss": 0.3258,
326
  "step": 1250
327
  },
328
  {
329
  "epoch": 0.16,
330
- "learning_rate": 5.3824e-06,
331
- "loss": 0.3405,
332
  "step": 1275
333
  },
334
  {
335
  "epoch": 0.16,
336
- "learning_rate": 5.3624000000000005e-06,
337
- "loss": 0.3316,
338
  "step": 1300
339
  },
340
  {
341
  "epoch": 0.17,
342
- "learning_rate": 5.3424e-06,
343
- "loss": 0.3187,
344
  "step": 1325
345
  },
346
  {
347
  "epoch": 0.17,
348
- "learning_rate": 5.3224e-06,
349
- "loss": 0.2521,
350
  "step": 1350
351
  },
352
  {
353
  "epoch": 0.17,
354
- "learning_rate": 5.3024e-06,
355
- "loss": 0.3445,
356
  "step": 1375
357
  },
358
  {
359
  "epoch": 0.17,
360
- "learning_rate": 5.2824e-06,
361
- "loss": 0.2715,
362
  "step": 1400
363
  },
364
  {
365
  "epoch": 0.18,
366
- "learning_rate": 5.2624e-06,
367
- "loss": 0.2951,
368
  "step": 1425
369
  },
370
  {
371
  "epoch": 0.18,
372
- "learning_rate": 5.2424e-06,
373
- "loss": 0.3858,
374
  "step": 1450
375
  },
376
  {
377
  "epoch": 0.18,
378
- "learning_rate": 5.2224e-06,
379
  "loss": 0.3253,
380
  "step": 1475
381
  },
382
  {
383
  "epoch": 0.19,
384
- "learning_rate": 5.2024e-06,
385
- "loss": 0.2823,
386
  "step": 1500
387
  },
388
  {
389
  "epoch": 0.19,
390
- "eval_loss": 0.3452778458595276,
391
- "eval_runtime": 1811.7894,
392
- "eval_samples_per_second": 3.638,
393
- "eval_steps_per_second": 0.455,
394
- "eval_wer": 24.685082425371625,
395
  "step": 1500
396
  },
397
  {
398
  "epoch": 0.19,
399
- "learning_rate": 5.1824000000000006e-06,
400
- "loss": 0.2764,
401
  "step": 1525
402
  },
403
  {
404
  "epoch": 0.19,
405
- "learning_rate": 5.162400000000001e-06,
406
- "loss": 0.2853,
407
  "step": 1550
408
  },
409
  {
410
  "epoch": 0.2,
411
- "learning_rate": 5.1424e-06,
412
- "loss": 0.353,
413
  "step": 1575
414
  },
415
  {
416
  "epoch": 0.2,
417
- "learning_rate": 5.1224e-06,
418
- "loss": 0.3318,
419
  "step": 1600
420
  },
421
  {
422
  "epoch": 0.2,
423
- "learning_rate": 5.1024000000000005e-06,
424
- "loss": 0.3729,
425
  "step": 1625
426
  },
427
  {
428
  "epoch": 0.21,
429
- "learning_rate": 5.0824e-06,
430
- "loss": 0.3314,
431
  "step": 1650
432
  },
433
  {
434
  "epoch": 0.21,
435
- "learning_rate": 5.0624e-06,
436
- "loss": 0.3193,
437
  "step": 1675
438
  },
439
  {
440
  "epoch": 0.21,
441
- "learning_rate": 5.0424e-06,
442
- "loss": 0.2881,
443
  "step": 1700
444
  },
445
  {
446
  "epoch": 0.22,
447
- "learning_rate": 5.0223999999999996e-06,
448
- "loss": 0.2961,
449
  "step": 1725
450
  },
451
  {
452
  "epoch": 0.22,
453
- "learning_rate": 5.0024e-06,
454
- "loss": 0.3286,
455
  "step": 1750
456
  },
457
  {
458
  "epoch": 0.22,
459
- "learning_rate": 4.9824e-06,
460
- "loss": 0.3192,
461
  "step": 1775
462
  },
463
  {
464
  "epoch": 0.23,
465
- "learning_rate": 4.9624e-06,
466
- "loss": 0.3183,
467
  "step": 1800
468
  },
469
  {
470
  "epoch": 0.23,
471
- "learning_rate": 4.9424e-06,
472
  "loss": 0.2706,
473
  "step": 1825
474
  },
475
  {
476
  "epoch": 0.23,
477
- "learning_rate": 4.9224000000000005e-06,
478
- "loss": 0.3048,
479
  "step": 1850
480
  },
481
  {
482
  "epoch": 0.23,
483
- "learning_rate": 4.902400000000001e-06,
484
- "loss": 0.2524,
485
  "step": 1875
486
  },
487
  {
488
  "epoch": 0.24,
489
- "learning_rate": 4.8824e-06,
490
- "loss": 0.2882,
491
  "step": 1900
492
  },
493
  {
494
  "epoch": 0.24,
495
- "learning_rate": 4.8624e-06,
496
- "loss": 0.2466,
497
  "step": 1925
498
  },
499
  {
500
  "epoch": 0.24,
501
- "learning_rate": 4.8424000000000004e-06,
502
- "loss": 0.1995,
503
  "step": 1950
504
  },
505
  {
506
  "epoch": 0.25,
507
- "learning_rate": 4.8224e-06,
508
- "loss": 0.2497,
509
  "step": 1975
510
  },
511
  {
512
  "epoch": 0.25,
513
- "learning_rate": 4.8024e-06,
514
- "loss": 0.2551,
515
  "step": 2000
516
  },
517
  {
518
  "epoch": 0.25,
519
- "eval_loss": 0.3163716495037079,
520
- "eval_runtime": 1819.6056,
521
- "eval_samples_per_second": 3.622,
522
- "eval_steps_per_second": 0.453,
523
- "eval_wer": 22.57888128316254,
524
  "step": 2000
525
  },
526
  {
527
  "epoch": 0.25,
528
- "learning_rate": 4.7824e-06,
529
- "loss": 0.2133,
530
  "step": 2025
531
  },
532
  {
533
  "epoch": 0.26,
534
- "learning_rate": 4.7623999999999995e-06,
535
- "loss": 0.2474,
536
  "step": 2050
537
  },
538
  {
539
  "epoch": 0.26,
540
- "learning_rate": 4.7424e-06,
541
- "loss": 0.2302,
542
  "step": 2075
543
  },
544
  {
545
  "epoch": 0.26,
546
- "learning_rate": 4.7232e-06,
547
- "loss": 0.3048,
548
  "step": 2100
549
  },
550
  {
551
  "epoch": 0.27,
552
- "learning_rate": 4.7032e-06,
553
- "loss": 0.2593,
554
  "step": 2125
555
  },
556
  {
557
  "epoch": 0.27,
558
- "learning_rate": 4.6832e-06,
559
- "loss": 0.2605,
560
  "step": 2150
561
  },
562
  {
563
  "epoch": 0.27,
564
- "learning_rate": 4.6632000000000005e-06,
565
- "loss": 0.2847,
566
  "step": 2175
567
  },
568
  {
569
  "epoch": 0.28,
570
- "learning_rate": 4.643200000000001e-06,
571
- "loss": 0.2519,
572
  "step": 2200
573
  },
574
  {
575
  "epoch": 0.28,
576
- "learning_rate": 4.6232e-06,
577
- "loss": 0.2576,
578
  "step": 2225
579
  },
580
  {
581
  "epoch": 0.28,
582
- "learning_rate": 4.6032e-06,
583
- "loss": 0.2261,
584
  "step": 2250
585
  },
586
  {
587
  "epoch": 0.28,
588
- "learning_rate": 4.5832000000000004e-06,
589
- "loss": 0.2777,
590
  "step": 2275
591
  },
592
  {
593
  "epoch": 0.29,
594
- "learning_rate": 4.5632e-06,
595
- "loss": 0.2586,
596
  "step": 2300
597
  },
598
  {
599
  "epoch": 0.29,
600
- "learning_rate": 4.5432e-06,
601
- "loss": 0.2482,
602
  "step": 2325
603
  },
604
  {
605
  "epoch": 0.29,
606
- "learning_rate": 4.5232e-06,
607
- "loss": 0.2068,
608
  "step": 2350
609
  },
610
  {
611
  "epoch": 0.3,
612
- "learning_rate": 4.5032e-06,
613
- "loss": 0.2333,
614
  "step": 2375
615
  },
616
  {
617
  "epoch": 0.3,
618
- "learning_rate": 4.4832e-06,
619
- "loss": 0.2451,
620
  "step": 2400
621
  },
622
  {
623
  "epoch": 0.3,
624
- "learning_rate": 4.4632e-06,
625
- "loss": 0.2199,
626
  "step": 2425
627
  },
628
  {
629
  "epoch": 0.31,
630
- "learning_rate": 4.4432e-06,
631
- "loss": 0.2519,
632
  "step": 2450
633
  },
634
  {
635
  "epoch": 0.31,
636
- "learning_rate": 4.4232e-06,
637
- "loss": 0.2314,
638
  "step": 2475
639
  },
640
  {
641
  "epoch": 0.31,
642
- "learning_rate": 4.4032000000000005e-06,
643
- "loss": 0.206,
644
  "step": 2500
645
  },
646
  {
647
  "epoch": 0.31,
648
- "eval_loss": 0.29017817974090576,
649
- "eval_runtime": 1826.6674,
650
- "eval_samples_per_second": 3.608,
651
- "eval_steps_per_second": 0.451,
652
- "eval_wer": 19.79221515654745,
653
  "step": 2500
654
  },
655
  {
656
  "epoch": 0.32,
657
- "learning_rate": 4.383200000000001e-06,
658
- "loss": 0.2336,
659
  "step": 2525
660
  },
661
  {
662
  "epoch": 0.32,
663
- "learning_rate": 4.3632e-06,
664
- "loss": 0.255,
665
  "step": 2550
666
  },
667
  {
668
  "epoch": 0.32,
669
- "learning_rate": 4.3432e-06,
670
- "loss": 0.2523,
671
  "step": 2575
672
  },
673
  {
674
  "epoch": 0.33,
675
- "learning_rate": 4.3232e-06,
676
- "loss": 0.2916,
677
  "step": 2600
678
  },
679
  {
680
  "epoch": 0.33,
681
- "learning_rate": 4.3032e-06,
682
- "loss": 0.1934,
683
  "step": 2625
684
  },
685
  {
686
  "epoch": 0.33,
687
- "learning_rate": 4.2832e-06,
688
- "loss": 0.245,
689
  "step": 2650
690
  },
691
  {
692
  "epoch": 0.33,
693
- "learning_rate": 4.2632e-06,
694
- "loss": 0.2059,
695
  "step": 2675
696
  },
697
  {
698
  "epoch": 0.34,
699
- "learning_rate": 4.2432e-06,
700
- "loss": 0.2197,
701
  "step": 2700
702
  },
703
  {
704
  "epoch": 0.34,
705
- "learning_rate": 4.2232e-06,
706
- "loss": 0.2108,
707
  "step": 2725
708
  },
709
  {
710
  "epoch": 0.34,
711
- "learning_rate": 4.2032e-06,
712
- "loss": 0.2485,
713
  "step": 2750
714
  },
715
  {
716
  "epoch": 0.35,
717
- "learning_rate": 4.1832e-06,
718
- "loss": 0.201,
719
  "step": 2775
720
  },
721
  {
722
  "epoch": 0.35,
723
- "learning_rate": 4.1632e-06,
724
- "loss": 0.2343,
725
  "step": 2800
726
  },
727
  {
728
  "epoch": 0.35,
729
- "learning_rate": 4.1432e-06,
730
- "loss": 0.2088,
731
  "step": 2825
732
  },
733
  {
734
  "epoch": 0.36,
735
- "learning_rate": 4.123200000000001e-06,
736
  "loss": 0.2294,
737
  "step": 2850
738
  },
739
  {
740
  "epoch": 0.36,
741
- "learning_rate": 4.1032e-06,
742
- "loss": 0.2226,
743
  "step": 2875
744
  },
745
  {
746
  "epoch": 0.36,
747
- "learning_rate": 4.0832e-06,
748
- "loss": 0.1993,
749
  "step": 2900
750
  },
751
  {
752
  "epoch": 0.37,
753
- "learning_rate": 4.0632e-06,
754
- "loss": 0.2006,
755
  "step": 2925
756
  },
757
  {
758
  "epoch": 0.37,
759
- "learning_rate": 4.0432e-06,
760
- "loss": 0.2385,
761
  "step": 2950
762
  },
763
  {
764
  "epoch": 0.37,
765
- "learning_rate": 4.0232e-06,
766
- "loss": 0.2225,
767
  "step": 2975
768
  },
769
  {
770
  "epoch": 0.38,
771
- "learning_rate": 4.0032e-06,
772
- "loss": 0.2327,
773
  "step": 3000
774
  },
775
  {
776
  "epoch": 0.38,
777
- "eval_loss": 0.2706596553325653,
778
- "eval_runtime": 1809.8842,
779
- "eval_samples_per_second": 3.642,
780
- "eval_steps_per_second": 0.455,
781
- "eval_wer": 18.935558345822027,
782
  "step": 3000
783
  },
784
  {
785
  "epoch": 0.38,
786
- "learning_rate": 3.9832e-06,
787
- "loss": 0.1988,
788
  "step": 3025
789
  },
790
  {
791
  "epoch": 0.38,
792
- "learning_rate": 3.9632e-06,
793
- "loss": 0.2117,
794
  "step": 3050
795
  },
796
  {
797
  "epoch": 0.38,
798
- "learning_rate": 3.9432e-06,
799
- "loss": 0.1887,
800
  "step": 3075
801
  },
802
  {
803
  "epoch": 0.39,
804
- "learning_rate": 3.9232e-06,
805
- "loss": 0.1975,
806
  "step": 3100
807
  },
808
  {
809
  "epoch": 0.39,
810
- "learning_rate": 3.9032e-06,
811
- "loss": 0.2138,
812
  "step": 3125
813
  },
814
  {
815
  "epoch": 0.39,
816
- "learning_rate": 3.8832e-06,
817
- "loss": 0.2082,
818
  "step": 3150
819
  },
820
  {
821
  "epoch": 0.4,
822
- "learning_rate": 3.8632000000000006e-06,
823
- "loss": 0.2365,
824
  "step": 3175
825
  },
826
  {
827
  "epoch": 0.4,
828
- "learning_rate": 3.8432e-06,
829
- "loss": 0.1919,
830
  "step": 3200
831
  },
832
  {
833
  "epoch": 0.4,
834
- "learning_rate": 3.8232e-06,
835
- "loss": 0.2334,
836
  "step": 3225
837
  },
838
  {
839
  "epoch": 0.41,
840
- "learning_rate": 3.8032000000000003e-06,
841
- "loss": 0.2743,
842
  "step": 3250
843
  },
844
  {
845
  "epoch": 0.41,
846
- "learning_rate": 3.7831999999999996e-06,
847
- "loss": 0.1992,
848
  "step": 3275
849
  },
850
  {
851
  "epoch": 1.0,
852
- "learning_rate": 3.7632e-06,
853
- "loss": 0.236,
854
  "step": 3300
855
  },
856
  {
857
  "epoch": 1.0,
858
- "learning_rate": 3.7432e-06,
859
- "loss": 0.2389,
860
  "step": 3325
861
  },
862
  {
863
  "epoch": 1.01,
864
- "learning_rate": 3.7232e-06,
865
- "loss": 0.2236,
866
  "step": 3350
867
  },
868
  {
869
  "epoch": 1.01,
870
- "learning_rate": 3.7032e-06,
871
- "loss": 0.2187,
872
  "step": 3375
873
  },
874
  {
875
  "epoch": 1.01,
876
- "learning_rate": 3.6832e-06,
877
- "loss": 0.2022,
878
  "step": 3400
879
  },
880
  {
881
  "epoch": 1.02,
882
- "learning_rate": 3.6632000000000004e-06,
883
- "loss": 0.1943,
884
  "step": 3425
885
  },
886
  {
887
  "epoch": 1.02,
888
- "learning_rate": 3.6431999999999997e-06,
889
- "loss": 0.1858,
890
  "step": 3450
891
  },
892
  {
893
  "epoch": 1.02,
894
- "learning_rate": 3.6232e-06,
895
- "loss": 0.1762,
896
  "step": 3475
897
  },
898
  {
899
  "epoch": 1.03,
900
- "learning_rate": 3.6032e-06,
901
- "loss": 0.1416,
902
  "step": 3500
903
  },
904
  {
905
  "epoch": 1.03,
906
- "eval_loss": 0.25662127137184143,
907
- "eval_runtime": 1825.0473,
908
- "eval_samples_per_second": 3.611,
909
- "eval_steps_per_second": 0.451,
910
- "eval_wer": 17.69208959455628,
911
  "step": 3500
912
  },
913
  {
914
  "epoch": 1.03,
915
- "learning_rate": 3.5832e-06,
916
- "loss": 0.1777,
917
  "step": 3525
918
  },
919
  {
920
  "epoch": 1.03,
921
- "learning_rate": 3.5632e-06,
922
- "loss": 0.1934,
923
  "step": 3550
924
  },
925
  {
926
  "epoch": 1.03,
927
- "learning_rate": 3.5432000000000002e-06,
928
- "loss": 0.1566,
929
  "step": 3575
930
  },
931
  {
932
  "epoch": 1.04,
933
- "learning_rate": 3.5232000000000004e-06,
934
- "loss": 0.1635,
935
  "step": 3600
936
  },
937
  {
938
  "epoch": 1.04,
939
- "learning_rate": 3.5031999999999998e-06,
940
- "loss": 0.1687,
941
  "step": 3625
942
  },
943
  {
944
  "epoch": 1.04,
945
- "learning_rate": 3.4832e-06,
946
- "loss": 0.1216,
947
  "step": 3650
948
  },
949
  {
950
  "epoch": 1.05,
951
- "learning_rate": 3.4632000000000006e-06,
952
- "loss": 0.1197,
953
  "step": 3675
954
  },
955
  {
956
  "epoch": 1.05,
957
- "learning_rate": 3.4432e-06,
958
- "loss": 0.1202,
959
  "step": 3700
960
  },
961
  {
962
  "epoch": 1.05,
963
- "learning_rate": 3.4232e-06,
964
- "loss": 0.1162,
965
  "step": 3725
966
  },
967
  {
968
  "epoch": 1.06,
969
- "learning_rate": 3.4032000000000003e-06,
970
- "loss": 0.1157,
971
  "step": 3750
972
  },
973
  {
974
  "epoch": 1.06,
975
- "learning_rate": 3.3831999999999996e-06,
976
- "loss": 0.1333,
977
  "step": 3775
978
  },
979
  {
980
  "epoch": 1.06,
981
- "learning_rate": 3.3632000000000003e-06,
982
- "loss": 0.099,
983
  "step": 3800
984
  },
985
  {
986
  "epoch": 1.07,
987
- "learning_rate": 3.3432000000000004e-06,
988
- "loss": 0.1247,
989
  "step": 3825
990
  },
991
  {
992
  "epoch": 1.07,
993
- "learning_rate": 3.3232e-06,
994
- "loss": 0.112,
995
  "step": 3850
996
  },
997
  {
998
  "epoch": 1.07,
999
- "learning_rate": 3.3032e-06,
1000
- "loss": 0.1092,
1001
  "step": 3875
1002
  },
1003
  {
1004
  "epoch": 1.08,
1005
- "learning_rate": 3.2832e-06,
1006
- "loss": 0.1422,
1007
  "step": 3900
1008
  },
1009
  {
1010
  "epoch": 1.08,
1011
- "learning_rate": 3.2632000000000004e-06,
1012
- "loss": 0.1294,
1013
  "step": 3925
1014
  },
1015
  {
1016
  "epoch": 1.08,
1017
- "learning_rate": 3.2432e-06,
1018
- "loss": 0.1185,
1019
  "step": 3950
1020
  },
1021
  {
1022
  "epoch": 1.08,
1023
- "learning_rate": 3.2232000000000003e-06,
1024
- "loss": 0.1345,
1025
  "step": 3975
1026
  },
1027
  {
1028
  "epoch": 1.09,
1029
- "learning_rate": 3.2032000000000005e-06,
1030
- "loss": 0.0998,
1031
  "step": 4000
1032
  },
1033
  {
1034
  "epoch": 1.09,
1035
- "eval_loss": 0.25507599115371704,
1036
- "eval_runtime": 1832.0695,
1037
- "eval_samples_per_second": 3.598,
1038
- "eval_steps_per_second": 0.45,
1039
- "eval_wer": 16.821256429989067,
1040
  "step": 4000
1041
  },
1042
  {
1043
  "epoch": 1.09,
1044
- "learning_rate": 3.1832e-06,
1045
- "loss": 0.1089,
1046
  "step": 4025
1047
  },
1048
  {
1049
  "epoch": 1.09,
1050
- "learning_rate": 3.1632e-06,
1051
- "loss": 0.1012,
1052
  "step": 4050
1053
  },
1054
  {
1055
  "epoch": 1.1,
1056
- "learning_rate": 3.1432000000000002e-06,
1057
- "loss": 0.1139,
1058
  "step": 4075
1059
  },
1060
  {
1061
  "epoch": 1.1,
1062
- "learning_rate": 3.1232e-06,
1063
- "loss": 0.1186,
1064
  "step": 4100
1065
  },
1066
  {
1067
  "epoch": 1.1,
1068
- "learning_rate": 3.104e-06,
1069
- "loss": 0.1189,
1070
  "step": 4125
1071
  },
1072
  {
1073
  "epoch": 1.11,
1074
- "learning_rate": 3.084e-06,
1075
- "loss": 0.0963,
1076
  "step": 4150
1077
  },
1078
  {
1079
  "epoch": 1.11,
1080
- "learning_rate": 3.0640000000000002e-06,
1081
- "loss": 0.1048,
1082
  "step": 4175
1083
  },
1084
  {
1085
  "epoch": 1.11,
1086
- "learning_rate": 3.044e-06,
1087
- "loss": 0.1172,
1088
  "step": 4200
1089
  },
1090
  {
1091
  "epoch": 1.12,
1092
- "learning_rate": 3.024e-06,
1093
- "loss": 0.1075,
1094
  "step": 4225
1095
  },
1096
  {
1097
  "epoch": 1.12,
1098
- "learning_rate": 3.0040000000000004e-06,
1099
- "loss": 0.1362,
1100
  "step": 4250
1101
  },
1102
  {
1103
  "epoch": 1.12,
1104
- "learning_rate": 2.984e-06,
1105
- "loss": 0.1227,
1106
  "step": 4275
1107
  },
1108
  {
1109
  "epoch": 1.13,
1110
- "learning_rate": 2.964e-06,
1111
- "loss": 0.1109,
1112
  "step": 4300
1113
  },
1114
  {
1115
  "epoch": 1.13,
1116
- "learning_rate": 2.944e-06,
1117
- "loss": 0.1078,
1118
  "step": 4325
1119
  },
1120
  {
1121
  "epoch": 1.13,
1122
- "learning_rate": 2.9240000000000003e-06,
1123
- "loss": 0.1169,
1124
  "step": 4350
1125
  },
1126
  {
1127
  "epoch": 1.13,
1128
- "learning_rate": 2.904e-06,
1129
- "loss": 0.0874,
1130
  "step": 4375
1131
  },
1132
  {
1133
  "epoch": 1.14,
1134
- "learning_rate": 2.8840000000000003e-06,
1135
- "loss": 0.0942,
1136
  "step": 4400
1137
  },
1138
  {
1139
  "epoch": 1.14,
1140
- "learning_rate": 2.864e-06,
1141
- "loss": 0.0842,
1142
  "step": 4425
1143
  },
1144
  {
1145
  "epoch": 1.14,
1146
- "learning_rate": 2.844e-06,
1147
- "loss": 0.0776,
1148
  "step": 4450
1149
  },
1150
  {
1151
  "epoch": 1.15,
1152
- "learning_rate": 2.824e-06,
1153
- "loss": 0.0973,
1154
  "step": 4475
1155
  },
1156
  {
1157
  "epoch": 1.15,
1158
- "learning_rate": 2.804e-06,
1159
- "loss": 0.095,
1160
  "step": 4500
1161
  },
1162
  {
1163
  "epoch": 1.15,
1164
- "eval_loss": 0.2510645389556885,
1165
- "eval_runtime": 1808.3002,
1166
- "eval_samples_per_second": 3.645,
1167
- "eval_steps_per_second": 0.456,
1168
- "eval_wer": 16.389890234517395,
1169
  "step": 4500
1170
  },
1171
  {
1172
  "epoch": 1.15,
1173
- "learning_rate": 2.7840000000000004e-06,
1174
- "loss": 0.0869,
1175
  "step": 4525
1176
  },
1177
  {
1178
  "epoch": 1.16,
1179
- "learning_rate": 2.764e-06,
1180
- "loss": 0.0864,
1181
  "step": 4550
1182
  },
1183
  {
1184
  "epoch": 1.16,
1185
- "learning_rate": 2.744e-06,
1186
  "loss": 0.1199,
1187
  "step": 4575
1188
  },
1189
  {
1190
  "epoch": 1.16,
1191
- "learning_rate": 2.724e-06,
1192
- "loss": 0.0972,
1193
  "step": 4600
1194
  },
1195
  {
1196
  "epoch": 1.17,
1197
- "learning_rate": 2.704e-06,
1198
- "loss": 0.0819,
1199
  "step": 4625
1200
  },
1201
  {
1202
  "epoch": 1.17,
1203
- "learning_rate": 2.684e-06,
1204
- "loss": 0.0767,
1205
  "step": 4650
1206
  },
1207
  {
1208
  "epoch": 1.17,
1209
- "learning_rate": 2.6640000000000002e-06,
1210
- "loss": 0.088,
1211
  "step": 4675
1212
  },
1213
  {
1214
  "epoch": 1.18,
1215
- "learning_rate": 2.644e-06,
1216
- "loss": 0.1127,
1217
  "step": 4700
1218
  },
1219
  {
1220
  "epoch": 1.18,
1221
- "learning_rate": 2.624e-06,
1222
- "loss": 0.0818,
1223
  "step": 4725
1224
  },
1225
  {
1226
  "epoch": 1.18,
1227
- "learning_rate": 2.604e-06,
1228
- "loss": 0.086,
1229
  "step": 4750
1230
  },
1231
  {
1232
  "epoch": 1.18,
1233
- "learning_rate": 2.5839999999999997e-06,
1234
- "loss": 0.095,
1235
  "step": 4775
1236
  },
1237
  {
1238
  "epoch": 1.19,
1239
- "learning_rate": 2.564e-06,
1240
- "loss": 0.0833,
1241
  "step": 4800
1242
  },
1243
  {
1244
  "epoch": 1.19,
1245
- "learning_rate": 2.544e-06,
1246
- "loss": 0.0818,
1247
  "step": 4825
1248
  },
1249
  {
1250
  "epoch": 1.19,
1251
- "learning_rate": 2.5240000000000003e-06,
1252
- "loss": 0.0801,
1253
  "step": 4850
1254
  },
1255
  {
1256
  "epoch": 1.2,
1257
- "learning_rate": 2.504e-06,
1258
- "loss": 0.0969,
1259
  "step": 4875
1260
  },
1261
  {
1262
  "epoch": 1.2,
1263
- "learning_rate": 2.484e-06,
1264
- "loss": 0.0883,
1265
  "step": 4900
1266
  },
1267
  {
1268
  "epoch": 1.2,
1269
- "learning_rate": 2.464e-06,
1270
- "loss": 0.0877,
1271
  "step": 4925
1272
  },
1273
  {
1274
  "epoch": 1.21,
1275
- "learning_rate": 2.444e-06,
1276
- "loss": 0.0964,
1277
  "step": 4950
1278
  },
1279
  {
1280
  "epoch": 1.21,
1281
- "learning_rate": 2.4240000000000004e-06,
1282
- "loss": 0.107,
1283
  "step": 4975
1284
  },
1285
  {
1286
  "epoch": 1.21,
1287
- "learning_rate": 2.404e-06,
1288
- "loss": 0.0971,
1289
  "step": 5000
1290
  },
1291
  {
1292
  "epoch": 1.21,
1293
- "eval_loss": 0.2415408343076706,
1294
- "eval_runtime": 1804.4355,
1295
- "eval_samples_per_second": 3.653,
1296
- "eval_steps_per_second": 0.457,
1297
- "eval_wer": 15.539309004009883,
1298
  "step": 5000
1299
  },
1300
  {
1301
  "epoch": 1.22,
1302
- "learning_rate": 2.384e-06,
1303
- "loss": 0.1077,
1304
  "step": 5025
1305
  },
1306
  {
1307
  "epoch": 1.22,
1308
- "learning_rate": 2.364e-06,
1309
- "loss": 0.0712,
1310
  "step": 5050
1311
  },
1312
  {
1313
  "epoch": 1.22,
1314
- "learning_rate": 2.344e-06,
1315
- "loss": 0.0749,
1316
  "step": 5075
1317
  },
1318
  {
1319
  "epoch": 1.23,
1320
- "learning_rate": 2.324e-06,
1321
- "loss": 0.0851,
1322
  "step": 5100
1323
  },
1324
  {
1325
  "epoch": 1.23,
1326
- "learning_rate": 2.3040000000000003e-06,
1327
- "loss": 0.0851,
1328
  "step": 5125
1329
  },
1330
  {
1331
  "epoch": 1.23,
1332
- "learning_rate": 2.284e-06,
1333
- "loss": 0.0782,
1334
  "step": 5150
1335
  },
1336
  {
1337
  "epoch": 1.23,
1338
- "learning_rate": 2.2640000000000003e-06,
1339
- "loss": 0.0795,
1340
  "step": 5175
1341
  },
1342
  {
1343
  "epoch": 1.24,
1344
- "learning_rate": 2.244e-06,
1345
- "loss": 0.0611,
1346
  "step": 5200
1347
  },
1348
  {
1349
  "epoch": 1.24,
1350
- "learning_rate": 2.224e-06,
1351
- "loss": 0.0752,
1352
  "step": 5225
1353
  },
1354
  {
1355
  "epoch": 1.24,
1356
- "learning_rate": 2.204e-06,
1357
- "loss": 0.0787,
1358
  "step": 5250
1359
  },
1360
  {
1361
  "epoch": 1.25,
1362
- "learning_rate": 2.184e-06,
1363
- "loss": 0.0717,
1364
  "step": 5275
1365
  },
1366
  {
1367
  "epoch": 1.25,
1368
- "learning_rate": 2.1640000000000004e-06,
1369
- "loss": 0.0748,
1370
  "step": 5300
1371
  },
1372
  {
1373
  "epoch": 1.25,
1374
- "learning_rate": 2.144e-06,
1375
- "loss": 0.0586,
1376
  "step": 5325
1377
  },
1378
  {
1379
  "epoch": 1.26,
1380
- "learning_rate": 2.124e-06,
1381
- "loss": 0.0719,
1382
  "step": 5350
1383
  },
1384
  {
1385
  "epoch": 1.26,
1386
- "learning_rate": 2.104e-06,
1387
- "loss": 0.0868,
1388
  "step": 5375
1389
  },
1390
  {
1391
  "epoch": 1.26,
1392
- "learning_rate": 2.084e-06,
1393
- "loss": 0.0631,
1394
  "step": 5400
1395
  },
1396
  {
1397
  "epoch": 1.27,
1398
- "learning_rate": 2.064e-06,
1399
- "loss": 0.0767,
1400
  "step": 5425
1401
  },
1402
  {
1403
  "epoch": 1.27,
1404
- "learning_rate": 2.0440000000000003e-06,
1405
- "loss": 0.0834,
1406
  "step": 5450
1407
  },
1408
  {
1409
  "epoch": 1.27,
1410
- "learning_rate": 2.024e-06,
1411
- "loss": 0.0767,
1412
  "step": 5475
1413
  },
1414
  {
1415
  "epoch": 1.28,
1416
- "learning_rate": 2.004e-06,
1417
- "loss": 0.0964,
1418
  "step": 5500
1419
  },
1420
  {
1421
  "epoch": 1.28,
1422
- "eval_loss": 0.2336428314447403,
1423
- "eval_runtime": 1826.0414,
1424
- "eval_samples_per_second": 3.609,
1425
- "eval_steps_per_second": 0.451,
1426
- "eval_wer": 15.170723804123293,
1427
  "step": 5500
1428
  },
1429
  {
1430
  "epoch": 1.28,
1431
- "learning_rate": 1.984e-06,
1432
- "loss": 0.0813,
1433
  "step": 5525
1434
  },
1435
  {
1436
  "epoch": 1.28,
1437
- "learning_rate": 1.9639999999999997e-06,
1438
- "loss": 0.0517,
1439
  "step": 5550
1440
  },
1441
  {
1442
  "epoch": 1.28,
1443
- "learning_rate": 1.944e-06,
1444
- "loss": 0.079,
1445
  "step": 5575
1446
  },
1447
  {
1448
  "epoch": 1.29,
1449
- "learning_rate": 1.924e-06,
1450
- "loss": 0.0787,
1451
  "step": 5600
1452
  },
1453
  {
1454
  "epoch": 1.29,
1455
- "learning_rate": 1.9040000000000001e-06,
1456
- "loss": 0.0537,
1457
  "step": 5625
1458
  },
1459
  {
1460
  "epoch": 1.29,
1461
- "learning_rate": 1.884e-06,
1462
- "loss": 0.0696,
1463
  "step": 5650
1464
  },
1465
  {
1466
  "epoch": 1.3,
1467
- "learning_rate": 1.8639999999999999e-06,
1468
- "loss": 0.0737,
1469
  "step": 5675
1470
  },
1471
  {
1472
  "epoch": 1.3,
1473
- "learning_rate": 1.844e-06,
1474
- "loss": 0.0698,
1475
  "step": 5700
1476
  },
1477
  {
1478
  "epoch": 1.3,
1479
- "learning_rate": 1.824e-06,
1480
- "loss": 0.0831,
1481
  "step": 5725
1482
  },
1483
  {
1484
  "epoch": 1.31,
1485
- "learning_rate": 1.8040000000000002e-06,
1486
- "loss": 0.0919,
1487
  "step": 5750
1488
  },
1489
  {
1490
  "epoch": 1.31,
1491
- "learning_rate": 1.784e-06,
1492
- "loss": 0.0846,
1493
  "step": 5775
1494
  },
1495
  {
1496
  "epoch": 1.31,
1497
- "learning_rate": 1.764e-06,
1498
- "loss": 0.0653,
1499
  "step": 5800
1500
  },
1501
  {
1502
  "epoch": 1.32,
1503
- "learning_rate": 1.7440000000000002e-06,
1504
- "loss": 0.0676,
1505
  "step": 5825
1506
  },
1507
  {
1508
  "epoch": 1.32,
1509
- "learning_rate": 1.724e-06,
1510
- "loss": 0.0559,
1511
  "step": 5850
1512
  },
1513
  {
1514
  "epoch": 1.32,
1515
- "learning_rate": 1.704e-06,
1516
- "loss": 0.0659,
1517
  "step": 5875
1518
  },
1519
  {
1520
  "epoch": 1.33,
1521
- "learning_rate": 1.684e-06,
1522
- "loss": 0.0693,
1523
  "step": 5900
1524
  },
1525
  {
1526
  "epoch": 1.33,
1527
- "learning_rate": 1.6639999999999999e-06,
1528
- "loss": 0.0582,
1529
  "step": 5925
1530
  },
1531
  {
1532
  "epoch": 1.33,
1533
- "learning_rate": 1.6440000000000003e-06,
1534
- "loss": 0.1016,
1535
  "step": 5950
1536
  },
1537
  {
1538
  "epoch": 1.33,
1539
- "learning_rate": 1.624e-06,
1540
- "loss": 0.064,
1541
  "step": 5975
1542
  },
1543
  {
1544
  "epoch": 1.34,
1545
- "learning_rate": 1.604e-06,
1546
- "loss": 0.072,
1547
  "step": 6000
1548
  },
1549
  {
1550
  "epoch": 1.34,
1551
- "eval_loss": 0.23533816635608673,
1552
- "eval_runtime": 1816.0058,
1553
- "eval_samples_per_second": 3.629,
1554
- "eval_steps_per_second": 0.454,
1555
- "eval_wer": 14.75960954271133,
1556
  "step": 6000
1557
  },
1558
  {
1559
  "epoch": 1.34,
1560
- "learning_rate": 1.5840000000000002e-06,
1561
- "loss": 0.0494,
1562
  "step": 6025
1563
  },
1564
  {
1565
  "epoch": 1.34,
1566
- "learning_rate": 1.564e-06,
1567
- "loss": 0.0647,
1568
  "step": 6050
1569
  },
1570
  {
1571
  "epoch": 1.35,
1572
- "learning_rate": 1.5440000000000002e-06,
1573
- "loss": 0.073,
1574
  "step": 6075
1575
  },
1576
  {
1577
  "epoch": 1.35,
1578
- "learning_rate": 1.5240000000000001e-06,
1579
- "loss": 0.0758,
1580
  "step": 6100
1581
  },
1582
  {
1583
  "epoch": 1.35,
1584
- "learning_rate": 1.504e-06,
1585
- "loss": 0.0473,
1586
  "step": 6125
1587
  },
1588
  {
1589
  "epoch": 1.36,
1590
- "learning_rate": 1.484e-06,
1591
- "loss": 0.0645,
1592
  "step": 6150
1593
  },
1594
  {
1595
  "epoch": 1.36,
1596
- "learning_rate": 1.464e-06,
1597
- "loss": 0.0544,
1598
  "step": 6175
1599
  },
1600
  {
1601
  "epoch": 1.36,
1602
- "learning_rate": 1.444e-06,
1603
- "loss": 0.0674,
1604
  "step": 6200
1605
  },
1606
  {
1607
  "epoch": 1.37,
1608
- "learning_rate": 1.424e-06,
1609
- "loss": 0.0721,
1610
  "step": 6225
1611
  },
1612
  {
1613
  "epoch": 1.37,
1614
- "learning_rate": 1.404e-06,
1615
- "loss": 0.0668,
1616
  "step": 6250
1617
  },
1618
  {
1619
  "epoch": 1.37,
1620
- "learning_rate": 1.384e-06,
1621
- "loss": 0.0587,
1622
  "step": 6275
1623
  },
1624
  {
1625
  "epoch": 1.38,
1626
- "learning_rate": 1.364e-06,
1627
- "loss": 0.0494,
1628
  "step": 6300
1629
  },
1630
  {
1631
  "epoch": 1.38,
1632
- "learning_rate": 1.344e-06,
1633
- "loss": 0.0802,
1634
  "step": 6325
1635
  },
1636
  {
1637
  "epoch": 1.38,
1638
- "learning_rate": 1.3240000000000002e-06,
1639
- "loss": 0.0636,
1640
  "step": 6350
1641
  },
1642
  {
1643
  "epoch": 1.38,
1644
- "learning_rate": 1.304e-06,
1645
- "loss": 0.0499,
1646
  "step": 6375
1647
  },
1648
  {
1649
  "epoch": 1.39,
1650
- "learning_rate": 1.284e-06,
1651
- "loss": 0.0666,
1652
  "step": 6400
1653
  },
1654
  {
1655
  "epoch": 1.39,
1656
- "learning_rate": 1.264e-06,
1657
- "loss": 0.0508,
1658
  "step": 6425
1659
  },
1660
  {
1661
  "epoch": 1.39,
1662
- "learning_rate": 1.244e-06,
1663
- "loss": 0.0472,
1664
  "step": 6450
1665
  },
1666
  {
1667
  "epoch": 1.4,
1668
- "learning_rate": 1.224e-06,
1669
- "loss": 0.0643,
1670
  "step": 6475
1671
  },
1672
  {
1673
  "epoch": 1.4,
1674
- "learning_rate": 1.204e-06,
1675
- "loss": 0.0658,
1676
  "step": 6500
1677
  },
1678
  {
1679
  "epoch": 1.4,
1680
- "eval_loss": 0.23401623964309692,
1681
- "eval_runtime": 1798.9797,
1682
  "eval_samples_per_second": 3.664,
1683
  "eval_steps_per_second": 0.458,
1684
- "eval_wer": 14.676576613066548,
1685
  "step": 6500
1686
  },
1687
  {
1688
  "epoch": 1.4,
1689
- "learning_rate": 1.1848e-06,
1690
- "loss": 0.0671,
1691
  "step": 6525
1692
  },
1693
  {
1694
  "epoch": 1.41,
1695
- "learning_rate": 1.1648e-06,
1696
- "loss": 0.0707,
1697
  "step": 6550
1698
  },
1699
  {
1700
  "epoch": 1.41,
1701
- "learning_rate": 1.1448e-06,
1702
- "loss": 0.0723,
1703
  "step": 6575
1704
  },
1705
  {
1706
  "epoch": 2.0,
1707
- "learning_rate": 1.1248e-06,
1708
- "loss": 0.0625,
1709
  "step": 6600
1710
  },
1711
  {
1712
  "epoch": 2.0,
1713
- "learning_rate": 1.1048e-06,
1714
- "loss": 0.0843,
1715
  "step": 6625
1716
  },
1717
  {
1718
  "epoch": 2.01,
1719
- "learning_rate": 1.0848e-06,
1720
- "loss": 0.087,
1721
  "step": 6650
1722
  },
1723
  {
1724
  "epoch": 2.01,
1725
- "learning_rate": 1.0648e-06,
1726
- "loss": 0.0561,
1727
  "step": 6675
1728
  },
1729
  {
1730
  "epoch": 2.01,
1731
- "learning_rate": 1.0448e-06,
1732
- "loss": 0.063,
1733
  "step": 6700
1734
  },
1735
  {
1736
  "epoch": 2.02,
1737
- "learning_rate": 1.0248000000000001e-06,
1738
- "loss": 0.0596,
1739
  "step": 6725
1740
  },
1741
  {
1742
  "epoch": 2.02,
1743
- "learning_rate": 1.0048e-06,
1744
- "loss": 0.0506,
1745
  "step": 6750
1746
  },
1747
  {
1748
  "epoch": 2.02,
1749
- "learning_rate": 9.848e-07,
1750
- "loss": 0.0526,
1751
  "step": 6775
1752
  },
1753
  {
1754
  "epoch": 2.03,
1755
- "learning_rate": 9.648e-07,
1756
- "loss": 0.0479,
1757
  "step": 6800
1758
  },
1759
  {
1760
  "epoch": 2.03,
1761
- "learning_rate": 9.448e-07,
1762
- "loss": 0.0494,
1763
  "step": 6825
1764
  },
1765
  {
1766
  "epoch": 2.03,
1767
- "learning_rate": 9.248000000000001e-07,
1768
- "loss": 0.0564,
1769
  "step": 6850
1770
  },
1771
  {
1772
  "epoch": 2.04,
1773
- "learning_rate": 9.048e-07,
1774
- "loss": 0.0499,
1775
  "step": 6875
1776
  },
1777
  {
1778
  "epoch": 2.04,
1779
- "learning_rate": 8.848e-07,
1780
- "loss": 0.0593,
1781
  "step": 6900
1782
  },
1783
  {
1784
  "epoch": 2.04,
1785
- "learning_rate": 8.648000000000001e-07,
1786
- "loss": 0.049,
1787
  "step": 6925
1788
  },
1789
  {
1790
  "epoch": 2.04,
1791
- "learning_rate": 8.448e-07,
1792
- "loss": 0.0527,
1793
  "step": 6950
1794
  },
1795
  {
1796
  "epoch": 2.05,
1797
- "learning_rate": 8.247999999999999e-07,
1798
- "loss": 0.0283,
1799
  "step": 6975
1800
  },
1801
  {
1802
  "epoch": 2.05,
1803
- "learning_rate": 8.048e-07,
1804
- "loss": 0.033,
1805
  "step": 7000
1806
  },
1807
  {
1808
  "epoch": 2.05,
1809
- "eval_loss": 0.2349175214767456,
1810
- "eval_runtime": 1811.228,
1811
- "eval_samples_per_second": 3.639,
1812
- "eval_steps_per_second": 0.455,
1813
- "eval_wer": 14.376847988982949,
1814
  "step": 7000
1815
  },
1816
  {
1817
  "epoch": 2.05,
1818
- "learning_rate": 7.848e-07,
1819
- "loss": 0.033,
1820
  "step": 7025
1821
  },
1822
  {
1823
  "epoch": 2.06,
1824
- "learning_rate": 7.648000000000001e-07,
1825
- "loss": 0.0397,
1826
  "step": 7050
1827
  },
1828
  {
1829
  "epoch": 2.06,
1830
- "learning_rate": 7.448e-07,
1831
- "loss": 0.0271,
1832
  "step": 7075
1833
  },
1834
  {
1835
  "epoch": 2.06,
1836
- "learning_rate": 7.248e-07,
1837
- "loss": 0.0404,
1838
  "step": 7100
1839
  },
1840
  {
1841
  "epoch": 2.07,
1842
- "learning_rate": 7.048e-07,
1843
- "loss": 0.0348,
1844
  "step": 7125
1845
  },
1846
  {
1847
  "epoch": 2.07,
1848
- "learning_rate": 6.848e-07,
1849
- "loss": 0.0279,
1850
  "step": 7150
1851
  },
1852
  {
1853
  "epoch": 2.07,
1854
- "learning_rate": 6.648e-07,
1855
- "loss": 0.0296,
1856
  "step": 7175
1857
  },
1858
  {
1859
  "epoch": 2.08,
1860
- "learning_rate": 6.448000000000001e-07,
1861
- "loss": 0.0473,
1862
  "step": 7200
1863
  },
1864
  {
1865
  "epoch": 2.08,
1866
- "learning_rate": 6.247999999999999e-07,
1867
- "loss": 0.0287,
1868
  "step": 7225
1869
  },
1870
  {
1871
  "epoch": 2.08,
1872
- "learning_rate": 6.048e-07,
1873
- "loss": 0.0377,
1874
  "step": 7250
1875
  },
1876
  {
1877
  "epoch": 2.09,
1878
- "learning_rate": 5.848e-07,
1879
- "loss": 0.042,
1880
  "step": 7275
1881
  },
1882
  {
1883
  "epoch": 2.09,
1884
- "learning_rate": 5.648e-07,
1885
- "loss": 0.0375,
1886
  "step": 7300
1887
  },
1888
  {
1889
  "epoch": 2.09,
1890
- "learning_rate": 5.448000000000001e-07,
1891
- "loss": 0.0367,
1892
  "step": 7325
1893
  },
1894
  {
1895
  "epoch": 2.09,
1896
- "learning_rate": 5.248e-07,
1897
- "loss": 0.0398,
1898
  "step": 7350
1899
  },
1900
  {
1901
  "epoch": 2.1,
1902
- "learning_rate": 5.048e-07,
1903
- "loss": 0.0381,
1904
  "step": 7375
1905
  },
1906
  {
1907
  "epoch": 2.1,
1908
- "learning_rate": 4.848e-07,
1909
- "loss": 0.0266,
1910
  "step": 7400
1911
  },
1912
  {
1913
  "epoch": 2.1,
1914
- "learning_rate": 4.6480000000000003e-07,
1915
- "loss": 0.0321,
1916
  "step": 7425
1917
  },
1918
  {
1919
  "epoch": 2.11,
1920
- "learning_rate": 4.4479999999999996e-07,
1921
- "loss": 0.0351,
1922
  "step": 7450
1923
  },
1924
  {
1925
  "epoch": 2.11,
1926
- "learning_rate": 4.2480000000000005e-07,
1927
- "loss": 0.0359,
1928
  "step": 7475
1929
  },
1930
  {
1931
  "epoch": 2.11,
1932
- "learning_rate": 4.0479999999999997e-07,
1933
- "loss": 0.0288,
1934
  "step": 7500
1935
  },
1936
  {
1937
  "epoch": 2.11,
1938
- "eval_loss": 0.23708966374397278,
1939
- "eval_runtime": 1812.1644,
1940
- "eval_samples_per_second": 3.637,
1941
- "eval_steps_per_second": 0.455,
1942
- "eval_wer": 14.186479808821742,
1943
  "step": 7500
1944
  },
1945
  {
1946
  "epoch": 2.12,
1947
- "learning_rate": 3.848e-07,
1948
- "loss": 0.0363,
1949
  "step": 7525
1950
  },
1951
  {
1952
  "epoch": 2.12,
1953
- "learning_rate": 3.648e-07,
1954
- "loss": 0.0351,
1955
  "step": 7550
1956
  },
1957
  {
1958
  "epoch": 2.12,
1959
- "learning_rate": 3.448e-07,
1960
- "loss": 0.029,
1961
  "step": 7575
1962
  },
1963
  {
1964
  "epoch": 2.13,
1965
- "learning_rate": 3.248e-07,
1966
- "loss": 0.0381,
1967
  "step": 7600
1968
  },
1969
  {
1970
  "epoch": 2.13,
1971
- "learning_rate": 3.048e-07,
1972
- "loss": 0.0299,
1973
  "step": 7625
1974
  },
1975
  {
1976
  "epoch": 2.13,
1977
- "learning_rate": 2.848e-07,
1978
- "loss": 0.0334,
1979
  "step": 7650
1980
  },
1981
  {
1982
  "epoch": 2.14,
1983
- "learning_rate": 2.648e-07,
1984
- "loss": 0.0273,
1985
  "step": 7675
1986
  },
1987
  {
1988
  "epoch": 2.14,
1989
- "learning_rate": 2.448e-07,
1990
- "loss": 0.0357,
1991
  "step": 7700
1992
  },
1993
  {
1994
  "epoch": 2.14,
1995
- "learning_rate": 2.2480000000000003e-07,
1996
- "loss": 0.0386,
1997
  "step": 7725
1998
  },
1999
  {
2000
  "epoch": 2.14,
2001
- "learning_rate": 2.048e-07,
2002
- "loss": 0.0307,
2003
  "step": 7750
2004
  },
2005
  {
2006
  "epoch": 2.15,
2007
- "learning_rate": 1.8480000000000001e-07,
2008
- "loss": 0.0319,
2009
  "step": 7775
2010
  },
2011
  {
2012
  "epoch": 2.15,
2013
- "learning_rate": 1.648e-07,
2014
- "loss": 0.0197,
2015
  "step": 7800
2016
  },
2017
  {
2018
  "epoch": 2.15,
2019
- "learning_rate": 1.448e-07,
2020
- "loss": 0.0328,
2021
  "step": 7825
2022
  },
2023
  {
2024
  "epoch": 2.16,
2025
- "learning_rate": 1.248e-07,
2026
- "loss": 0.0317,
2027
  "step": 7850
2028
  },
2029
  {
2030
  "epoch": 2.16,
2031
- "learning_rate": 1.048e-07,
2032
- "loss": 0.0264,
2033
  "step": 7875
2034
  },
2035
  {
2036
  "epoch": 2.16,
2037
- "learning_rate": 8.48e-08,
2038
- "loss": 0.029,
2039
  "step": 7900
2040
  },
2041
  {
2042
  "epoch": 2.17,
2043
- "learning_rate": 6.480000000000001e-08,
2044
- "loss": 0.0224,
2045
  "step": 7925
2046
  },
2047
  {
2048
  "epoch": 2.17,
2049
- "learning_rate": 4.48e-08,
2050
- "loss": 0.0297,
2051
  "step": 7950
2052
  },
2053
  {
2054
  "epoch": 2.17,
2055
- "learning_rate": 2.48e-08,
2056
- "loss": 0.0337,
2057
  "step": 7975
2058
  },
2059
  {
2060
  "epoch": 2.18,
2061
- "learning_rate": 4.8e-09,
2062
- "loss": 0.0352,
2063
  "step": 8000
2064
  },
2065
  {
2066
  "epoch": 2.18,
2067
- "eval_loss": 0.23755376040935516,
2068
- "eval_runtime": 1811.3928,
2069
- "eval_samples_per_second": 3.639,
2070
- "eval_steps_per_second": 0.455,
2071
- "eval_wer": 14.119648426424725,
2072
  "step": 8000
2073
  },
2074
  {
2075
  "epoch": 2.18,
2076
  "step": 8000,
2077
  "total_flos": 3.265527462100992e+19,
2078
- "train_loss": 0.191678307980299,
2079
- "train_runtime": 39548.417,
2080
- "train_samples_per_second": 0.809,
2081
- "train_steps_per_second": 0.202
2082
  }
2083
  ],
2084
  "max_steps": 8000,
 
1
  {
2
+ "best_metric": 12.839726193851513,
3
+ "best_model_checkpoint": "./checkpoint-7500",
4
  "epoch": 2.176,
5
  "global_step": 8000,
6
  "is_hyper_param_search": false,
 
9
  "log_history": [
10
  {
11
  "epoch": 0.0,
12
+ "learning_rate": 4.6000000000000004e-07,
13
+ "loss": 1.4182,
14
  "step": 25
15
  },
16
  {
17
  "epoch": 0.01,
18
+ "learning_rate": 9.400000000000001e-07,
19
+ "loss": 1.292,
20
  "step": 50
21
  },
22
  {
23
  "epoch": 0.01,
24
+ "learning_rate": 1.44e-06,
25
+ "loss": 1.0018,
26
  "step": 75
27
  },
28
  {
29
  "epoch": 0.01,
30
+ "learning_rate": 1.94e-06,
31
+ "loss": 0.7765,
32
  "step": 100
33
  },
34
  {
35
  "epoch": 0.02,
36
+ "learning_rate": 2.4400000000000004e-06,
37
+ "loss": 0.7103,
38
  "step": 125
39
  },
40
  {
41
  "epoch": 0.02,
42
+ "learning_rate": 2.9400000000000002e-06,
43
+ "loss": 0.6597,
44
  "step": 150
45
  },
46
  {
47
  "epoch": 0.02,
48
+ "learning_rate": 3.44e-06,
49
+ "loss": 0.6657,
50
  "step": 175
51
  },
52
  {
53
  "epoch": 0.03,
54
+ "learning_rate": 3.94e-06,
55
+ "loss": 0.5853,
56
  "step": 200
57
  },
58
  {
59
  "epoch": 0.03,
60
+ "learning_rate": 4.440000000000001e-06,
61
+ "loss": 0.5273,
62
  "step": 225
63
  },
64
  {
65
  "epoch": 0.03,
66
+ "learning_rate": 4.94e-06,
67
+ "loss": 0.5979,
68
  "step": 250
69
  },
70
  {
71
  "epoch": 0.03,
72
+ "learning_rate": 5.4400000000000004e-06,
73
+ "loss": 0.5861,
74
  "step": 275
75
  },
76
  {
77
  "epoch": 0.04,
78
+ "learning_rate": 5.94e-06,
79
+ "loss": 0.5085,
80
  "step": 300
81
  },
82
  {
83
  "epoch": 0.04,
84
+ "learning_rate": 6.440000000000001e-06,
85
+ "loss": 0.4827,
86
  "step": 325
87
  },
88
  {
89
  "epoch": 0.04,
90
+ "learning_rate": 6.9400000000000005e-06,
91
+ "loss": 0.4909,
92
  "step": 350
93
  },
94
  {
95
  "epoch": 0.05,
96
+ "learning_rate": 7.440000000000001e-06,
97
+ "loss": 0.4651,
98
  "step": 375
99
  },
100
  {
101
  "epoch": 0.05,
102
+ "learning_rate": 7.94e-06,
103
+ "loss": 0.494,
104
  "step": 400
105
  },
106
  {
107
  "epoch": 0.05,
108
+ "learning_rate": 8.44e-06,
109
+ "loss": 0.4188,
110
  "step": 425
111
  },
112
  {
113
  "epoch": 0.06,
114
+ "learning_rate": 8.94e-06,
115
+ "loss": 0.3849,
116
  "step": 450
117
  },
118
  {
119
  "epoch": 0.06,
120
+ "learning_rate": 9.440000000000001e-06,
121
+ "loss": 0.4577,
122
  "step": 475
123
  },
124
  {
125
  "epoch": 0.06,
126
+ "learning_rate": 9.940000000000001e-06,
127
+ "loss": 0.4415,
128
  "step": 500
129
  },
130
  {
131
  "epoch": 0.06,
132
+ "eval_loss": 0.5091741681098938,
133
+ "eval_runtime": 1795.8417,
134
+ "eval_samples_per_second": 3.67,
135
+ "eval_steps_per_second": 0.459,
136
+ "eval_wer": 36.96990562598728,
137
  "step": 500
138
  },
139
  {
140
  "epoch": 0.07,
141
+ "learning_rate": 9.970666666666668e-06,
142
+ "loss": 0.4614,
143
  "step": 525
144
  },
145
  {
146
  "epoch": 0.07,
147
+ "learning_rate": 9.937333333333334e-06,
148
+ "loss": 0.4284,
149
  "step": 550
150
  },
151
  {
152
  "epoch": 0.07,
153
+ "learning_rate": 9.904e-06,
154
+ "loss": 0.4486,
155
  "step": 575
156
  },
157
  {
158
  "epoch": 0.07,
159
+ "learning_rate": 9.870666666666667e-06,
160
+ "loss": 0.4431,
161
  "step": 600
162
  },
163
  {
164
  "epoch": 0.08,
165
+ "learning_rate": 9.837333333333335e-06,
166
+ "loss": 0.4245,
167
  "step": 625
168
  },
169
  {
170
  "epoch": 0.08,
171
+ "learning_rate": 9.804000000000001e-06,
172
+ "loss": 0.4264,
173
  "step": 650
174
  },
175
  {
176
  "epoch": 0.08,
177
+ "learning_rate": 9.770666666666668e-06,
178
+ "loss": 0.4147,
179
  "step": 675
180
  },
181
  {
182
  "epoch": 0.09,
183
+ "learning_rate": 9.737333333333334e-06,
184
+ "loss": 0.4116,
185
  "step": 700
186
  },
187
  {
188
  "epoch": 0.09,
189
+ "learning_rate": 9.704e-06,
190
+ "loss": 0.3665,
191
  "step": 725
192
  },
193
  {
194
  "epoch": 0.09,
195
+ "learning_rate": 9.670666666666667e-06,
196
+ "loss": 0.4009,
197
  "step": 750
198
  },
199
  {
200
  "epoch": 0.1,
201
+ "learning_rate": 9.637333333333333e-06,
202
+ "loss": 0.3902,
203
  "step": 775
204
  },
205
  {
206
  "epoch": 0.1,
207
+ "learning_rate": 9.604000000000002e-06,
208
+ "loss": 0.3657,
209
  "step": 800
210
  },
211
  {
212
  "epoch": 0.1,
213
+ "learning_rate": 9.570666666666666e-06,
214
+ "loss": 0.3781,
215
  "step": 825
216
  },
217
  {
218
  "epoch": 0.11,
219
+ "learning_rate": 9.537333333333334e-06,
220
+ "loss": 0.4346,
221
  "step": 850
222
  },
223
  {
224
  "epoch": 0.11,
225
+ "learning_rate": 9.504e-06,
226
+ "loss": 0.4036,
227
  "step": 875
228
  },
229
  {
230
  "epoch": 0.11,
231
+ "learning_rate": 9.470666666666667e-06,
232
+ "loss": 0.3776,
233
  "step": 900
234
  },
235
  {
236
  "epoch": 0.12,
237
+ "learning_rate": 9.437333333333334e-06,
238
+ "loss": 0.4082,
239
  "step": 925
240
  },
241
  {
242
  "epoch": 0.12,
243
+ "learning_rate": 9.404e-06,
244
+ "loss": 0.3838,
245
  "step": 950
246
  },
247
  {
248
  "epoch": 0.12,
249
+ "learning_rate": 9.370666666666668e-06,
250
+ "loss": 0.4215,
251
  "step": 975
252
  },
253
  {
254
  "epoch": 0.12,
255
+ "learning_rate": 9.337333333333335e-06,
256
+ "loss": 0.4206,
257
  "step": 1000
258
  },
259
  {
260
  "epoch": 0.12,
261
+ "eval_loss": 0.41442760825157166,
262
+ "eval_runtime": 1800.1281,
263
+ "eval_samples_per_second": 3.661,
264
+ "eval_steps_per_second": 0.458,
265
+ "eval_wer": 28.33650613633602,
266
  "step": 1000
267
  },
268
  {
269
  "epoch": 0.13,
270
+ "learning_rate": 9.304000000000001e-06,
271
+ "loss": 0.4108,
272
  "step": 1025
273
  },
274
  {
275
  "epoch": 0.13,
276
+ "learning_rate": 9.270666666666667e-06,
277
+ "loss": 0.3692,
278
  "step": 1050
279
  },
280
  {
281
  "epoch": 0.13,
282
+ "learning_rate": 9.237333333333334e-06,
283
+ "loss": 0.3605,
284
  "step": 1075
285
  },
286
  {
287
  "epoch": 0.14,
288
+ "learning_rate": 9.204e-06,
289
+ "loss": 0.3929,
290
  "step": 1100
291
  },
292
  {
293
  "epoch": 0.14,
294
+ "learning_rate": 9.170666666666668e-06,
295
+ "loss": 0.3869,
296
  "step": 1125
297
  },
298
  {
299
  "epoch": 0.14,
300
+ "learning_rate": 9.137333333333333e-06,
301
+ "loss": 0.3675,
302
  "step": 1150
303
  },
304
  {
305
  "epoch": 0.15,
306
+ "learning_rate": 9.104000000000001e-06,
307
+ "loss": 0.3237,
308
  "step": 1175
309
  },
310
  {
311
  "epoch": 0.15,
312
+ "learning_rate": 9.070666666666668e-06,
313
+ "loss": 0.3177,
314
  "step": 1200
315
  },
316
  {
317
  "epoch": 0.15,
318
+ "learning_rate": 9.037333333333334e-06,
319
+ "loss": 0.2814,
320
  "step": 1225
321
  },
322
  {
323
  "epoch": 0.16,
324
+ "learning_rate": 9.004e-06,
325
+ "loss": 0.3247,
326
  "step": 1250
327
  },
328
  {
329
  "epoch": 0.16,
330
+ "learning_rate": 8.970666666666667e-06,
331
+ "loss": 0.3484,
332
  "step": 1275
333
  },
334
  {
335
  "epoch": 0.16,
336
+ "learning_rate": 8.937333333333335e-06,
337
+ "loss": 0.3439,
338
  "step": 1300
339
  },
340
  {
341
  "epoch": 0.17,
342
+ "learning_rate": 8.904e-06,
343
+ "loss": 0.3278,
344
  "step": 1325
345
  },
346
  {
347
  "epoch": 0.17,
348
+ "learning_rate": 8.870666666666668e-06,
349
+ "loss": 0.2681,
350
  "step": 1350
351
  },
352
  {
353
  "epoch": 0.17,
354
+ "learning_rate": 8.837333333333334e-06,
355
+ "loss": 0.348,
356
  "step": 1375
357
  },
358
  {
359
  "epoch": 0.17,
360
+ "learning_rate": 8.804e-06,
361
+ "loss": 0.2799,
362
  "step": 1400
363
  },
364
  {
365
  "epoch": 0.18,
366
+ "learning_rate": 8.770666666666667e-06,
367
+ "loss": 0.2981,
368
  "step": 1425
369
  },
370
  {
371
  "epoch": 0.18,
372
+ "learning_rate": 8.737333333333334e-06,
373
+ "loss": 0.3931,
374
  "step": 1450
375
  },
376
  {
377
  "epoch": 0.18,
378
+ "learning_rate": 8.704e-06,
379
  "loss": 0.3253,
380
  "step": 1475
381
  },
382
  {
383
  "epoch": 0.19,
384
+ "learning_rate": 8.670666666666666e-06,
385
+ "loss": 0.272,
386
  "step": 1500
387
  },
388
  {
389
  "epoch": 0.19,
390
+ "eval_loss": 0.355411559343338,
391
+ "eval_runtime": 1814.9108,
392
+ "eval_samples_per_second": 3.632,
393
+ "eval_steps_per_second": 0.454,
394
+ "eval_wer": 24.74381303414476,
395
  "step": 1500
396
  },
397
  {
398
  "epoch": 0.19,
399
+ "learning_rate": 8.637333333333335e-06,
400
+ "loss": 0.2907,
401
  "step": 1525
402
  },
403
  {
404
  "epoch": 0.19,
405
+ "learning_rate": 8.604000000000001e-06,
406
+ "loss": 0.2933,
407
  "step": 1550
408
  },
409
  {
410
  "epoch": 0.2,
411
+ "learning_rate": 8.570666666666667e-06,
412
+ "loss": 0.3691,
413
  "step": 1575
414
  },
415
  {
416
  "epoch": 0.2,
417
+ "learning_rate": 8.537333333333334e-06,
418
+ "loss": 0.3436,
419
  "step": 1600
420
  },
421
  {
422
  "epoch": 0.2,
423
+ "learning_rate": 8.504000000000002e-06,
424
+ "loss": 0.3675,
425
  "step": 1625
426
  },
427
  {
428
  "epoch": 0.21,
429
+ "learning_rate": 8.470666666666667e-06,
430
+ "loss": 0.3293,
431
  "step": 1650
432
  },
433
  {
434
  "epoch": 0.21,
435
+ "learning_rate": 8.437333333333335e-06,
436
+ "loss": 0.3261,
437
  "step": 1675
438
  },
439
  {
440
  "epoch": 0.21,
441
+ "learning_rate": 8.404000000000001e-06,
442
+ "loss": 0.2926,
443
  "step": 1700
444
  },
445
  {
446
  "epoch": 0.22,
447
+ "learning_rate": 8.370666666666668e-06,
448
+ "loss": 0.3077,
449
  "step": 1725
450
  },
451
  {
452
  "epoch": 0.22,
453
+ "learning_rate": 8.337333333333334e-06,
454
+ "loss": 0.3402,
455
  "step": 1750
456
  },
457
  {
458
  "epoch": 0.22,
459
+ "learning_rate": 8.304e-06,
460
+ "loss": 0.3268,
461
  "step": 1775
462
  },
463
  {
464
  "epoch": 0.23,
465
+ "learning_rate": 8.270666666666667e-06,
466
+ "loss": 0.3374,
467
  "step": 1800
468
  },
469
  {
470
  "epoch": 0.23,
471
+ "learning_rate": 8.237333333333333e-06,
472
  "loss": 0.2706,
473
  "step": 1825
474
  },
475
  {
476
  "epoch": 0.23,
477
+ "learning_rate": 8.204000000000001e-06,
478
+ "loss": 0.3183,
479
  "step": 1850
480
  },
481
  {
482
  "epoch": 0.23,
483
+ "learning_rate": 8.170666666666668e-06,
484
+ "loss": 0.2651,
485
  "step": 1875
486
  },
487
  {
488
  "epoch": 0.24,
489
+ "learning_rate": 8.137333333333334e-06,
490
+ "loss": 0.2943,
491
  "step": 1900
492
  },
493
  {
494
  "epoch": 0.24,
495
+ "learning_rate": 8.104e-06,
496
+ "loss": 0.2566,
497
  "step": 1925
498
  },
499
  {
500
  "epoch": 0.24,
501
+ "learning_rate": 8.070666666666667e-06,
502
+ "loss": 0.2191,
503
  "step": 1950
504
  },
505
  {
506
  "epoch": 0.25,
507
+ "learning_rate": 8.037333333333334e-06,
508
+ "loss": 0.2475,
509
  "step": 1975
510
  },
511
  {
512
  "epoch": 0.25,
513
+ "learning_rate": 8.004e-06,
514
+ "loss": 0.2681,
515
  "step": 2000
516
  },
517
  {
518
  "epoch": 0.25,
519
+ "eval_loss": 0.32705560326576233,
520
+ "eval_runtime": 1813.7957,
521
+ "eval_samples_per_second": 3.634,
522
+ "eval_steps_per_second": 0.454,
523
+ "eval_wer": 22.141439507472963,
524
  "step": 2000
525
  },
526
  {
527
  "epoch": 0.25,
528
+ "learning_rate": 7.970666666666668e-06,
529
+ "loss": 0.2198,
530
  "step": 2025
531
  },
532
  {
533
  "epoch": 0.26,
534
+ "learning_rate": 7.937333333333333e-06,
535
+ "loss": 0.2488,
536
  "step": 2050
537
  },
538
  {
539
  "epoch": 0.26,
540
+ "learning_rate": 7.904000000000001e-06,
541
+ "loss": 0.255,
542
  "step": 2075
543
  },
544
  {
545
  "epoch": 0.26,
546
+ "learning_rate": 7.872e-06,
547
+ "loss": 0.3194,
548
  "step": 2100
549
  },
550
  {
551
  "epoch": 0.27,
552
+ "learning_rate": 7.838666666666668e-06,
553
+ "loss": 0.267,
554
  "step": 2125
555
  },
556
  {
557
  "epoch": 0.27,
558
+ "learning_rate": 7.805333333333333e-06,
559
+ "loss": 0.2606,
560
  "step": 2150
561
  },
562
  {
563
  "epoch": 0.27,
564
+ "learning_rate": 7.772000000000001e-06,
565
+ "loss": 0.298,
566
  "step": 2175
567
  },
568
  {
569
  "epoch": 0.28,
570
+ "learning_rate": 7.738666666666668e-06,
571
+ "loss": 0.2627,
572
  "step": 2200
573
  },
574
  {
575
  "epoch": 0.28,
576
+ "learning_rate": 7.705333333333334e-06,
577
+ "loss": 0.2669,
578
  "step": 2225
579
  },
580
  {
581
  "epoch": 0.28,
582
+ "learning_rate": 7.672e-06,
583
+ "loss": 0.2211,
584
  "step": 2250
585
  },
586
  {
587
  "epoch": 0.28,
588
+ "learning_rate": 7.638666666666667e-06,
589
+ "loss": 0.2841,
590
  "step": 2275
591
  },
592
  {
593
  "epoch": 0.29,
594
+ "learning_rate": 7.605333333333333e-06,
595
+ "loss": 0.2735,
596
  "step": 2300
597
  },
598
  {
599
  "epoch": 0.29,
600
+ "learning_rate": 7.5720000000000005e-06,
601
+ "loss": 0.2536,
602
  "step": 2325
603
  },
604
  {
605
  "epoch": 0.29,
606
+ "learning_rate": 7.538666666666668e-06,
607
+ "loss": 0.2091,
608
  "step": 2350
609
  },
610
  {
611
  "epoch": 0.3,
612
+ "learning_rate": 7.505333333333334e-06,
613
+ "loss": 0.2331,
614
  "step": 2375
615
  },
616
  {
617
  "epoch": 0.3,
618
+ "learning_rate": 7.472000000000001e-06,
619
+ "loss": 0.2565,
620
  "step": 2400
621
  },
622
  {
623
  "epoch": 0.3,
624
+ "learning_rate": 7.438666666666667e-06,
625
+ "loss": 0.2227,
626
  "step": 2425
627
  },
628
  {
629
  "epoch": 0.31,
630
+ "learning_rate": 7.405333333333334e-06,
631
+ "loss": 0.2651,
632
  "step": 2450
633
  },
634
  {
635
  "epoch": 0.31,
636
+ "learning_rate": 7.372e-06,
637
+ "loss": 0.2292,
638
  "step": 2475
639
  },
640
  {
641
  "epoch": 0.31,
642
+ "learning_rate": 7.338666666666667e-06,
643
+ "loss": 0.2099,
644
  "step": 2500
645
  },
646
  {
647
  "epoch": 0.31,
648
+ "eval_loss": 0.2973436415195465,
649
+ "eval_runtime": 1831.6606,
650
+ "eval_samples_per_second": 3.598,
651
+ "eval_steps_per_second": 0.45,
652
+ "eval_wer": 19.53501559398923,
653
  "step": 2500
654
  },
655
  {
656
  "epoch": 0.32,
657
+ "learning_rate": 7.3053333333333344e-06,
658
+ "loss": 0.2293,
659
  "step": 2525
660
  },
661
  {
662
  "epoch": 0.32,
663
+ "learning_rate": 7.272e-06,
664
+ "loss": 0.2747,
665
  "step": 2550
666
  },
667
  {
668
  "epoch": 0.32,
669
+ "learning_rate": 7.238666666666667e-06,
670
+ "loss": 0.2507,
671
  "step": 2575
672
  },
673
  {
674
  "epoch": 0.33,
675
+ "learning_rate": 7.2053333333333345e-06,
676
+ "loss": 0.2871,
677
  "step": 2600
678
  },
679
  {
680
  "epoch": 0.33,
681
+ "learning_rate": 7.172e-06,
682
+ "loss": 0.2031,
683
  "step": 2625
684
  },
685
  {
686
  "epoch": 0.33,
687
+ "learning_rate": 7.138666666666667e-06,
688
+ "loss": 0.2481,
689
  "step": 2650
690
  },
691
  {
692
  "epoch": 0.33,
693
+ "learning_rate": 7.105333333333334e-06,
694
+ "loss": 0.2173,
695
  "step": 2675
696
  },
697
  {
698
  "epoch": 0.34,
699
+ "learning_rate": 7.072000000000001e-06,
700
+ "loss": 0.2288,
701
  "step": 2700
702
  },
703
  {
704
  "epoch": 0.34,
705
+ "learning_rate": 7.038666666666667e-06,
706
+ "loss": 0.2227,
707
  "step": 2725
708
  },
709
  {
710
  "epoch": 0.34,
711
+ "learning_rate": 7.005333333333334e-06,
712
+ "loss": 0.2666,
713
  "step": 2750
714
  },
715
  {
716
  "epoch": 0.35,
717
+ "learning_rate": 6.972000000000001e-06,
718
+ "loss": 0.2017,
719
  "step": 2775
720
  },
721
  {
722
  "epoch": 0.35,
723
+ "learning_rate": 6.938666666666667e-06,
724
+ "loss": 0.2285,
725
  "step": 2800
726
  },
727
  {
728
  "epoch": 0.35,
729
+ "learning_rate": 6.905333333333334e-06,
730
+ "loss": 0.2226,
731
  "step": 2825
732
  },
733
  {
734
  "epoch": 0.36,
735
+ "learning_rate": 6.872000000000001e-06,
736
  "loss": 0.2294,
737
  "step": 2850
738
  },
739
  {
740
  "epoch": 0.36,
741
+ "learning_rate": 6.838666666666667e-06,
742
+ "loss": 0.2147,
743
  "step": 2875
744
  },
745
  {
746
  "epoch": 0.36,
747
+ "learning_rate": 6.805333333333334e-06,
748
+ "loss": 0.2145,
749
  "step": 2900
750
  },
751
  {
752
  "epoch": 0.37,
753
+ "learning_rate": 6.7720000000000006e-06,
754
+ "loss": 0.2163,
755
  "step": 2925
756
  },
757
  {
758
  "epoch": 0.37,
759
+ "learning_rate": 6.738666666666667e-06,
760
+ "loss": 0.2418,
761
  "step": 2950
762
  },
763
  {
764
  "epoch": 0.37,
765
+ "learning_rate": 6.705333333333333e-06,
766
+ "loss": 0.2254,
767
  "step": 2975
768
  },
769
  {
770
  "epoch": 0.38,
771
+ "learning_rate": 6.672000000000001e-06,
772
+ "loss": 0.2283,
773
  "step": 3000
774
  },
775
  {
776
  "epoch": 0.38,
777
+ "eval_loss": 0.2760361433029175,
778
+ "eval_runtime": 1825.768,
779
+ "eval_samples_per_second": 3.61,
780
+ "eval_steps_per_second": 0.451,
781
+ "eval_wer": 18.50419215035036,
782
  "step": 3000
783
  },
784
  {
785
  "epoch": 0.38,
786
+ "learning_rate": 6.638666666666668e-06,
787
+ "loss": 0.2079,
788
  "step": 3025
789
  },
790
  {
791
  "epoch": 0.38,
792
+ "learning_rate": 6.6053333333333335e-06,
793
+ "loss": 0.2072,
794
  "step": 3050
795
  },
796
  {
797
  "epoch": 0.38,
798
+ "learning_rate": 6.572000000000001e-06,
799
+ "loss": 0.1982,
800
  "step": 3075
801
  },
802
  {
803
  "epoch": 0.39,
804
+ "learning_rate": 6.538666666666667e-06,
805
+ "loss": 0.203,
806
  "step": 3100
807
  },
808
  {
809
  "epoch": 0.39,
810
+ "learning_rate": 6.505333333333334e-06,
811
+ "loss": 0.2086,
812
  "step": 3125
813
  },
814
  {
815
  "epoch": 0.39,
816
+ "learning_rate": 6.472000000000001e-06,
817
+ "loss": 0.2027,
818
  "step": 3150
819
  },
820
  {
821
  "epoch": 0.4,
822
+ "learning_rate": 6.438666666666667e-06,
823
+ "loss": 0.2472,
824
  "step": 3175
825
  },
826
  {
827
  "epoch": 0.4,
828
+ "learning_rate": 6.405333333333334e-06,
829
+ "loss": 0.2051,
830
  "step": 3200
831
  },
832
  {
833
  "epoch": 0.4,
834
+ "learning_rate": 6.372e-06,
835
+ "loss": 0.2442,
836
  "step": 3225
837
  },
838
  {
839
  "epoch": 0.41,
840
+ "learning_rate": 6.338666666666667e-06,
841
+ "loss": 0.2736,
842
  "step": 3250
843
  },
844
  {
845
  "epoch": 0.41,
846
+ "learning_rate": 6.305333333333333e-06,
847
+ "loss": 0.2056,
848
  "step": 3275
849
  },
850
  {
851
  "epoch": 1.0,
852
+ "learning_rate": 6.272e-06,
853
+ "loss": 0.2335,
854
  "step": 3300
855
  },
856
  {
857
  "epoch": 1.0,
858
+ "learning_rate": 6.2386666666666675e-06,
859
+ "loss": 0.2336,
860
  "step": 3325
861
  },
862
  {
863
  "epoch": 1.01,
864
+ "learning_rate": 6.205333333333334e-06,
865
+ "loss": 0.2246,
866
  "step": 3350
867
  },
868
  {
869
  "epoch": 1.01,
870
+ "learning_rate": 6.172e-06,
871
+ "loss": 0.2139,
872
  "step": 3375
873
  },
874
  {
875
  "epoch": 1.01,
876
+ "learning_rate": 6.138666666666668e-06,
877
+ "loss": 0.1926,
878
  "step": 3400
879
  },
880
  {
881
  "epoch": 1.02,
882
+ "learning_rate": 6.105333333333334e-06,
883
+ "loss": 0.1883,
884
  "step": 3425
885
  },
886
  {
887
  "epoch": 1.02,
888
+ "learning_rate": 6.0720000000000005e-06,
889
+ "loss": 0.1779,
890
  "step": 3450
891
  },
892
  {
893
  "epoch": 1.02,
894
+ "learning_rate": 6.038666666666667e-06,
895
+ "loss": 0.1774,
896
  "step": 3475
897
  },
898
  {
899
  "epoch": 1.03,
900
+ "learning_rate": 6.005333333333334e-06,
901
+ "loss": 0.1477,
902
  "step": 3500
903
  },
904
  {
905
  "epoch": 1.03,
906
+ "eval_loss": 0.263724148273468,
907
+ "eval_runtime": 1810.3464,
908
+ "eval_samples_per_second": 3.641,
909
+ "eval_steps_per_second": 0.455,
910
+ "eval_wer": 17.149337761756247,
911
  "step": 3500
912
  },
913
  {
914
  "epoch": 1.03,
915
+ "learning_rate": 5.972e-06,
916
+ "loss": 0.1764,
917
  "step": 3525
918
  },
919
  {
920
  "epoch": 1.03,
921
+ "learning_rate": 5.938666666666667e-06,
922
+ "loss": 0.1823,
923
  "step": 3550
924
  },
925
  {
926
  "epoch": 1.03,
927
+ "learning_rate": 5.905333333333334e-06,
928
+ "loss": 0.1493,
929
  "step": 3575
930
  },
931
  {
932
  "epoch": 1.04,
933
+ "learning_rate": 5.872000000000001e-06,
934
+ "loss": 0.1619,
935
  "step": 3600
936
  },
937
  {
938
  "epoch": 1.04,
939
+ "learning_rate": 5.838666666666667e-06,
940
+ "loss": 0.1614,
941
  "step": 3625
942
  },
943
  {
944
  "epoch": 1.04,
945
+ "learning_rate": 5.8053333333333335e-06,
946
+ "loss": 0.1225,
947
  "step": 3650
948
  },
949
  {
950
  "epoch": 1.05,
951
+ "learning_rate": 5.772000000000001e-06,
952
+ "loss": 0.1155,
953
  "step": 3675
954
  },
955
  {
956
  "epoch": 1.05,
957
+ "learning_rate": 5.738666666666667e-06,
958
+ "loss": 0.1134,
959
  "step": 3700
960
  },
961
  {
962
  "epoch": 1.05,
963
+ "learning_rate": 5.705333333333334e-06,
964
+ "loss": 0.1134,
965
  "step": 3725
966
  },
967
  {
968
  "epoch": 1.06,
969
+ "learning_rate": 5.672000000000001e-06,
970
+ "loss": 0.1068,
971
  "step": 3750
972
  },
973
  {
974
  "epoch": 1.06,
975
+ "learning_rate": 5.6386666666666665e-06,
976
+ "loss": 0.1295,
977
  "step": 3775
978
  },
979
  {
980
  "epoch": 1.06,
981
+ "learning_rate": 5.605333333333334e-06,
982
+ "loss": 0.0968,
983
  "step": 3800
984
  },
985
  {
986
  "epoch": 1.07,
987
+ "learning_rate": 5.572000000000001e-06,
988
+ "loss": 0.1202,
989
  "step": 3825
990
  },
991
  {
992
  "epoch": 1.07,
993
+ "learning_rate": 5.538666666666667e-06,
994
+ "loss": 0.1058,
995
  "step": 3850
996
  },
997
  {
998
  "epoch": 1.07,
999
+ "learning_rate": 5.505333333333334e-06,
1000
+ "loss": 0.0992,
1001
  "step": 3875
1002
  },
1003
  {
1004
  "epoch": 1.08,
1005
+ "learning_rate": 5.472e-06,
1006
+ "loss": 0.1327,
1007
  "step": 3900
1008
  },
1009
  {
1010
  "epoch": 1.08,
1011
+ "learning_rate": 5.4386666666666676e-06,
1012
+ "loss": 0.1263,
1013
  "step": 3925
1014
  },
1015
  {
1016
  "epoch": 1.08,
1017
+ "learning_rate": 5.405333333333333e-06,
1018
+ "loss": 0.1284,
1019
  "step": 3950
1020
  },
1021
  {
1022
  "epoch": 1.08,
1023
+ "learning_rate": 5.372e-06,
1024
+ "loss": 0.1268,
1025
  "step": 3975
1026
  },
1027
  {
1028
  "epoch": 1.09,
1029
+ "learning_rate": 5.338666666666668e-06,
1030
+ "loss": 0.1008,
1031
  "step": 4000
1032
  },
1033
  {
1034
  "epoch": 1.09,
1035
+ "eval_loss": 0.2592164874076843,
1036
+ "eval_runtime": 1806.1641,
1037
+ "eval_samples_per_second": 3.649,
1038
+ "eval_steps_per_second": 0.456,
1039
+ "eval_wer": 16.393940621329335,
1040
  "step": 4000
1041
  },
1042
  {
1043
  "epoch": 1.09,
1044
+ "learning_rate": 5.305333333333333e-06,
1045
+ "loss": 0.1054,
1046
  "step": 4025
1047
  },
1048
  {
1049
  "epoch": 1.09,
1050
+ "learning_rate": 5.2720000000000005e-06,
1051
+ "loss": 0.0985,
1052
  "step": 4050
1053
  },
1054
  {
1055
  "epoch": 1.1,
1056
+ "learning_rate": 5.238666666666668e-06,
1057
+ "loss": 0.1112,
1058
  "step": 4075
1059
  },
1060
  {
1061
  "epoch": 1.1,
1062
+ "learning_rate": 5.205333333333333e-06,
1063
+ "loss": 0.117,
1064
  "step": 4100
1065
  },
1066
  {
1067
  "epoch": 1.1,
1068
+ "learning_rate": 5.172000000000001e-06,
1069
+ "loss": 0.1106,
1070
  "step": 4125
1071
  },
1072
  {
1073
  "epoch": 1.11,
1074
+ "learning_rate": 5.140000000000001e-06,
1075
+ "loss": 0.0938,
1076
  "step": 4150
1077
  },
1078
  {
1079
  "epoch": 1.11,
1080
+ "learning_rate": 5.106666666666667e-06,
1081
+ "loss": 0.1119,
1082
  "step": 4175
1083
  },
1084
  {
1085
  "epoch": 1.11,
1086
+ "learning_rate": 5.073333333333334e-06,
1087
+ "loss": 0.1204,
1088
  "step": 4200
1089
  },
1090
  {
1091
  "epoch": 1.12,
1092
+ "learning_rate": 5.04e-06,
1093
+ "loss": 0.1002,
1094
  "step": 4225
1095
  },
1096
  {
1097
  "epoch": 1.12,
1098
+ "learning_rate": 5.006666666666667e-06,
1099
+ "loss": 0.1252,
1100
  "step": 4250
1101
  },
1102
  {
1103
  "epoch": 1.12,
1104
+ "learning_rate": 4.973333333333334e-06,
1105
+ "loss": 0.1189,
1106
  "step": 4275
1107
  },
1108
  {
1109
  "epoch": 1.13,
1110
+ "learning_rate": 4.94e-06,
1111
+ "loss": 0.1079,
1112
  "step": 4300
1113
  },
1114
  {
1115
  "epoch": 1.13,
1116
+ "learning_rate": 4.9066666666666666e-06,
1117
+ "loss": 0.1053,
1118
  "step": 4325
1119
  },
1120
  {
1121
  "epoch": 1.13,
1122
+ "learning_rate": 4.873333333333334e-06,
1123
+ "loss": 0.1105,
1124
  "step": 4350
1125
  },
1126
  {
1127
  "epoch": 1.13,
1128
+ "learning_rate": 4.84e-06,
1129
+ "loss": 0.0836,
1130
  "step": 4375
1131
  },
1132
  {
1133
  "epoch": 1.14,
1134
+ "learning_rate": 4.8066666666666675e-06,
1135
+ "loss": 0.0927,
1136
  "step": 4400
1137
  },
1138
  {
1139
  "epoch": 1.14,
1140
+ "learning_rate": 4.773333333333334e-06,
1141
+ "loss": 0.0861,
1142
  "step": 4425
1143
  },
1144
  {
1145
  "epoch": 1.14,
1146
+ "learning_rate": 4.74e-06,
1147
+ "loss": 0.0705,
1148
  "step": 4450
1149
  },
1150
  {
1151
  "epoch": 1.15,
1152
+ "learning_rate": 4.706666666666667e-06,
1153
+ "loss": 0.0845,
1154
  "step": 4475
1155
  },
1156
  {
1157
  "epoch": 1.15,
1158
+ "learning_rate": 4.673333333333333e-06,
1159
+ "loss": 0.0866,
1160
  "step": 4500
1161
  },
1162
  {
1163
  "epoch": 1.15,
1164
+ "eval_loss": 0.2561035752296448,
1165
+ "eval_runtime": 1801.0109,
1166
+ "eval_samples_per_second": 3.66,
1167
+ "eval_steps_per_second": 0.458,
1168
+ "eval_wer": 15.806634533597958,
1169
  "step": 4500
1170
  },
1171
  {
1172
  "epoch": 1.15,
1173
+ "learning_rate": 4.6400000000000005e-06,
1174
+ "loss": 0.0792,
1175
  "step": 4525
1176
  },
1177
  {
1178
  "epoch": 1.16,
1179
+ "learning_rate": 4.606666666666667e-06,
1180
+ "loss": 0.0782,
1181
  "step": 4550
1182
  },
1183
  {
1184
  "epoch": 1.16,
1185
+ "learning_rate": 4.573333333333333e-06,
1186
  "loss": 0.1199,
1187
  "step": 4575
1188
  },
1189
  {
1190
  "epoch": 1.16,
1191
+ "learning_rate": 4.540000000000001e-06,
1192
+ "loss": 0.0928,
1193
  "step": 4600
1194
  },
1195
  {
1196
  "epoch": 1.17,
1197
+ "learning_rate": 4.506666666666667e-06,
1198
+ "loss": 0.0901,
1199
  "step": 4625
1200
  },
1201
  {
1202
  "epoch": 1.17,
1203
+ "learning_rate": 4.473333333333334e-06,
1204
+ "loss": 0.0739,
1205
  "step": 4650
1206
  },
1207
  {
1208
  "epoch": 1.17,
1209
+ "learning_rate": 4.440000000000001e-06,
1210
+ "loss": 0.0807,
1211
  "step": 4675
1212
  },
1213
  {
1214
  "epoch": 1.18,
1215
+ "learning_rate": 4.406666666666667e-06,
1216
+ "loss": 0.1097,
1217
  "step": 4700
1218
  },
1219
  {
1220
  "epoch": 1.18,
1221
+ "learning_rate": 4.3733333333333335e-06,
1222
+ "loss": 0.0824,
1223
  "step": 4725
1224
  },
1225
  {
1226
  "epoch": 1.18,
1227
+ "learning_rate": 4.34e-06,
1228
+ "loss": 0.0923,
1229
  "step": 4750
1230
  },
1231
  {
1232
  "epoch": 1.18,
1233
+ "learning_rate": 4.306666666666666e-06,
1234
+ "loss": 0.0891,
1235
  "step": 4775
1236
  },
1237
  {
1238
  "epoch": 1.19,
1239
+ "learning_rate": 4.273333333333334e-06,
1240
+ "loss": 0.0765,
1241
  "step": 4800
1242
  },
1243
  {
1244
  "epoch": 1.19,
1245
+ "learning_rate": 4.24e-06,
1246
+ "loss": 0.0725,
1247
  "step": 4825
1248
  },
1249
  {
1250
  "epoch": 1.19,
1251
+ "learning_rate": 4.206666666666667e-06,
1252
+ "loss": 0.0743,
1253
  "step": 4850
1254
  },
1255
  {
1256
  "epoch": 1.2,
1257
+ "learning_rate": 4.173333333333334e-06,
1258
+ "loss": 0.0888,
1259
  "step": 4875
1260
  },
1261
  {
1262
  "epoch": 1.2,
1263
+ "learning_rate": 4.14e-06,
1264
+ "loss": 0.088,
1265
  "step": 4900
1266
  },
1267
  {
1268
  "epoch": 1.2,
1269
+ "learning_rate": 4.1066666666666674e-06,
1270
+ "loss": 0.0856,
1271
  "step": 4925
1272
  },
1273
  {
1274
  "epoch": 1.21,
1275
+ "learning_rate": 4.073333333333334e-06,
1276
+ "loss": 0.0862,
1277
  "step": 4950
1278
  },
1279
  {
1280
  "epoch": 1.21,
1281
+ "learning_rate": 4.04e-06,
1282
+ "loss": 0.0986,
1283
  "step": 4975
1284
  },
1285
  {
1286
  "epoch": 1.21,
1287
+ "learning_rate": 4.006666666666667e-06,
1288
+ "loss": 0.0915,
1289
  "step": 5000
1290
  },
1291
  {
1292
  "epoch": 1.21,
1293
+ "eval_loss": 0.24113886058330536,
1294
+ "eval_runtime": 1828.0642,
1295
+ "eval_samples_per_second": 3.605,
1296
+ "eval_steps_per_second": 0.451,
1297
+ "eval_wer": 15.030985459111346,
1298
  "step": 5000
1299
  },
1300
  {
1301
  "epoch": 1.22,
1302
+ "learning_rate": 3.973333333333333e-06,
1303
+ "loss": 0.0955,
1304
  "step": 5025
1305
  },
1306
  {
1307
  "epoch": 1.22,
1308
+ "learning_rate": 3.94e-06,
1309
+ "loss": 0.0729,
1310
  "step": 5050
1311
  },
1312
  {
1313
  "epoch": 1.22,
1314
+ "learning_rate": 3.906666666666667e-06,
1315
+ "loss": 0.0799,
1316
  "step": 5075
1317
  },
1318
  {
1319
  "epoch": 1.23,
1320
+ "learning_rate": 3.873333333333333e-06,
1321
+ "loss": 0.0848,
1322
  "step": 5100
1323
  },
1324
  {
1325
  "epoch": 1.23,
1326
+ "learning_rate": 3.8400000000000005e-06,
1327
+ "loss": 0.0813,
1328
  "step": 5125
1329
  },
1330
  {
1331
  "epoch": 1.23,
1332
+ "learning_rate": 3.806666666666667e-06,
1333
+ "loss": 0.0719,
1334
  "step": 5150
1335
  },
1336
  {
1337
  "epoch": 1.23,
1338
+ "learning_rate": 3.7733333333333338e-06,
1339
+ "loss": 0.0781,
1340
  "step": 5175
1341
  },
1342
  {
1343
  "epoch": 1.24,
1344
+ "learning_rate": 3.74e-06,
1345
+ "loss": 0.0582,
1346
  "step": 5200
1347
  },
1348
  {
1349
  "epoch": 1.24,
1350
+ "learning_rate": 3.7066666666666666e-06,
1351
+ "loss": 0.0838,
1352
  "step": 5225
1353
  },
1354
  {
1355
  "epoch": 1.24,
1356
+ "learning_rate": 3.673333333333334e-06,
1357
+ "loss": 0.0721,
1358
  "step": 5250
1359
  },
1360
  {
1361
  "epoch": 1.25,
1362
+ "learning_rate": 3.6400000000000003e-06,
1363
+ "loss": 0.0672,
1364
  "step": 5275
1365
  },
1366
  {
1367
  "epoch": 1.25,
1368
+ "learning_rate": 3.606666666666667e-06,
1369
+ "loss": 0.0654,
1370
  "step": 5300
1371
  },
1372
  {
1373
  "epoch": 1.25,
1374
+ "learning_rate": 3.5733333333333336e-06,
1375
+ "loss": 0.0514,
1376
  "step": 5325
1377
  },
1378
  {
1379
  "epoch": 1.26,
1380
+ "learning_rate": 3.54e-06,
1381
+ "loss": 0.061,
1382
  "step": 5350
1383
  },
1384
  {
1385
  "epoch": 1.26,
1386
+ "learning_rate": 3.5066666666666673e-06,
1387
+ "loss": 0.0919,
1388
  "step": 5375
1389
  },
1390
  {
1391
  "epoch": 1.26,
1392
+ "learning_rate": 3.4733333333333337e-06,
1393
+ "loss": 0.0565,
1394
  "step": 5400
1395
  },
1396
  {
1397
  "epoch": 1.27,
1398
+ "learning_rate": 3.44e-06,
1399
+ "loss": 0.0758,
1400
  "step": 5425
1401
  },
1402
  {
1403
  "epoch": 1.27,
1404
+ "learning_rate": 3.406666666666667e-06,
1405
+ "loss": 0.0724,
1406
  "step": 5450
1407
  },
1408
  {
1409
  "epoch": 1.27,
1410
+ "learning_rate": 3.3733333333333334e-06,
1411
+ "loss": 0.0687,
1412
  "step": 5475
1413
  },
1414
  {
1415
  "epoch": 1.28,
1416
+ "learning_rate": 3.3400000000000006e-06,
1417
+ "loss": 0.0803,
1418
  "step": 5500
1419
  },
1420
  {
1421
  "epoch": 1.28,
1422
+ "eval_loss": 0.2330218106508255,
1423
+ "eval_runtime": 1819.3056,
1424
+ "eval_samples_per_second": 3.623,
1425
+ "eval_steps_per_second": 0.453,
1426
+ "eval_wer": 14.7616347361173,
1427
  "step": 5500
1428
  },
1429
  {
1430
  "epoch": 1.28,
1431
+ "learning_rate": 3.306666666666667e-06,
1432
+ "loss": 0.0637,
1433
  "step": 5525
1434
  },
1435
  {
1436
  "epoch": 1.28,
1437
+ "learning_rate": 3.2733333333333335e-06,
1438
+ "loss": 0.0459,
1439
  "step": 5550
1440
  },
1441
  {
1442
  "epoch": 1.28,
1443
+ "learning_rate": 3.2400000000000003e-06,
1444
+ "loss": 0.0658,
1445
  "step": 5575
1446
  },
1447
  {
1448
  "epoch": 1.29,
1449
+ "learning_rate": 3.2066666666666667e-06,
1450
+ "loss": 0.0685,
1451
  "step": 5600
1452
  },
1453
  {
1454
  "epoch": 1.29,
1455
+ "learning_rate": 3.173333333333334e-06,
1456
+ "loss": 0.0501,
1457
  "step": 5625
1458
  },
1459
  {
1460
  "epoch": 1.29,
1461
+ "learning_rate": 3.1400000000000004e-06,
1462
+ "loss": 0.066,
1463
  "step": 5650
1464
  },
1465
  {
1466
  "epoch": 1.3,
1467
+ "learning_rate": 3.106666666666667e-06,
1468
+ "loss": 0.0689,
1469
  "step": 5675
1470
  },
1471
  {
1472
  "epoch": 1.3,
1473
+ "learning_rate": 3.0733333333333337e-06,
1474
+ "loss": 0.0632,
1475
  "step": 5700
1476
  },
1477
  {
1478
  "epoch": 1.3,
1479
+ "learning_rate": 3.04e-06,
1480
+ "loss": 0.078,
1481
  "step": 5725
1482
  },
1483
  {
1484
  "epoch": 1.31,
1485
+ "learning_rate": 3.0066666666666674e-06,
1486
+ "loss": 0.0889,
1487
  "step": 5750
1488
  },
1489
  {
1490
  "epoch": 1.31,
1491
+ "learning_rate": 2.973333333333334e-06,
1492
+ "loss": 0.0808,
1493
  "step": 5775
1494
  },
1495
  {
1496
  "epoch": 1.31,
1497
+ "learning_rate": 2.9400000000000002e-06,
1498
+ "loss": 0.059,
1499
  "step": 5800
1500
  },
1501
  {
1502
  "epoch": 1.32,
1503
+ "learning_rate": 2.906666666666667e-06,
1504
+ "loss": 0.0658,
1505
  "step": 5825
1506
  },
1507
  {
1508
  "epoch": 1.32,
1509
+ "learning_rate": 2.8733333333333335e-06,
1510
+ "loss": 0.0509,
1511
  "step": 5850
1512
  },
1513
  {
1514
  "epoch": 1.32,
1515
+ "learning_rate": 2.84e-06,
1516
+ "loss": 0.0612,
1517
  "step": 5875
1518
  },
1519
  {
1520
  "epoch": 1.33,
1521
+ "learning_rate": 2.806666666666667e-06,
1522
+ "loss": 0.0515,
1523
  "step": 5900
1524
  },
1525
  {
1526
  "epoch": 1.33,
1527
+ "learning_rate": 2.7733333333333336e-06,
1528
+ "loss": 0.051,
1529
  "step": 5925
1530
  },
1531
  {
1532
  "epoch": 1.33,
1533
+ "learning_rate": 2.7400000000000004e-06,
1534
+ "loss": 0.09,
1535
  "step": 5950
1536
  },
1537
  {
1538
  "epoch": 1.33,
1539
+ "learning_rate": 2.706666666666667e-06,
1540
+ "loss": 0.0568,
1541
  "step": 5975
1542
  },
1543
  {
1544
  "epoch": 1.34,
1545
+ "learning_rate": 2.6733333333333333e-06,
1546
+ "loss": 0.0674,
1547
  "step": 6000
1548
  },
1549
  {
1550
  "epoch": 1.34,
1551
+ "eval_loss": 0.23249581456184387,
1552
+ "eval_runtime": 1802.9331,
1553
+ "eval_samples_per_second": 3.656,
1554
+ "eval_steps_per_second": 0.457,
1555
+ "eval_wer": 13.846247316618737,
1556
  "step": 6000
1557
  },
1558
  {
1559
  "epoch": 1.34,
1560
+ "learning_rate": 2.64e-06,
1561
+ "loss": 0.0397,
1562
  "step": 6025
1563
  },
1564
  {
1565
  "epoch": 1.34,
1566
+ "learning_rate": 2.606666666666667e-06,
1567
+ "loss": 0.0617,
1568
  "step": 6050
1569
  },
1570
  {
1571
  "epoch": 1.35,
1572
+ "learning_rate": 2.573333333333334e-06,
1573
+ "loss": 0.0619,
1574
  "step": 6075
1575
  },
1576
  {
1577
  "epoch": 1.35,
1578
+ "learning_rate": 2.5400000000000002e-06,
1579
+ "loss": 0.0641,
1580
  "step": 6100
1581
  },
1582
  {
1583
  "epoch": 1.35,
1584
+ "learning_rate": 2.5066666666666667e-06,
1585
+ "loss": 0.0443,
1586
  "step": 6125
1587
  },
1588
  {
1589
  "epoch": 1.36,
1590
+ "learning_rate": 2.4733333333333335e-06,
1591
+ "loss": 0.0639,
1592
  "step": 6150
1593
  },
1594
  {
1595
  "epoch": 1.36,
1596
+ "learning_rate": 2.4400000000000004e-06,
1597
+ "loss": 0.0411,
1598
  "step": 6175
1599
  },
1600
  {
1601
  "epoch": 1.36,
1602
+ "learning_rate": 2.4066666666666668e-06,
1603
+ "loss": 0.0634,
1604
  "step": 6200
1605
  },
1606
  {
1607
  "epoch": 1.37,
1608
+ "learning_rate": 2.3733333333333336e-06,
1609
+ "loss": 0.0666,
1610
  "step": 6225
1611
  },
1612
  {
1613
  "epoch": 1.37,
1614
+ "learning_rate": 2.3400000000000005e-06,
1615
+ "loss": 0.0583,
1616
  "step": 6250
1617
  },
1618
  {
1619
  "epoch": 1.37,
1620
+ "learning_rate": 2.306666666666667e-06,
1621
+ "loss": 0.0491,
1622
  "step": 6275
1623
  },
1624
  {
1625
  "epoch": 1.38,
1626
+ "learning_rate": 2.2733333333333333e-06,
1627
+ "loss": 0.0515,
1628
  "step": 6300
1629
  },
1630
  {
1631
  "epoch": 1.38,
1632
+ "learning_rate": 2.24e-06,
1633
+ "loss": 0.0758,
1634
  "step": 6325
1635
  },
1636
  {
1637
  "epoch": 1.38,
1638
+ "learning_rate": 2.206666666666667e-06,
1639
+ "loss": 0.0582,
1640
  "step": 6350
1641
  },
1642
  {
1643
  "epoch": 1.38,
1644
+ "learning_rate": 2.1733333333333334e-06,
1645
+ "loss": 0.0468,
1646
  "step": 6375
1647
  },
1648
  {
1649
  "epoch": 1.39,
1650
+ "learning_rate": 2.1413333333333336e-06,
1651
+ "loss": 0.0589,
1652
  "step": 6400
1653
  },
1654
  {
1655
  "epoch": 1.39,
1656
+ "learning_rate": 2.108e-06,
1657
+ "loss": 0.0463,
1658
  "step": 6425
1659
  },
1660
  {
1661
  "epoch": 1.39,
1662
+ "learning_rate": 2.074666666666667e-06,
1663
+ "loss": 0.0396,
1664
  "step": 6450
1665
  },
1666
  {
1667
  "epoch": 1.4,
1668
+ "learning_rate": 2.0413333333333337e-06,
1669
+ "loss": 0.0605,
1670
  "step": 6475
1671
  },
1672
  {
1673
  "epoch": 1.4,
1674
+ "learning_rate": 2.008e-06,
1675
+ "loss": 0.0679,
1676
  "step": 6500
1677
  },
1678
  {
1679
  "epoch": 1.4,
1680
+ "eval_loss": 0.22990146279335022,
1681
+ "eval_runtime": 1798.7772,
1682
  "eval_samples_per_second": 3.664,
1683
  "eval_steps_per_second": 0.458,
1684
+ "eval_wer": 13.58094698043663,
1685
  "step": 6500
1686
  },
1687
  {
1688
  "epoch": 1.4,
1689
+ "learning_rate": 1.974666666666667e-06,
1690
+ "loss": 0.0645,
1691
  "step": 6525
1692
  },
1693
  {
1694
  "epoch": 1.41,
1695
+ "learning_rate": 1.9413333333333334e-06,
1696
+ "loss": 0.0662,
1697
  "step": 6550
1698
  },
1699
  {
1700
  "epoch": 1.41,
1701
+ "learning_rate": 1.908e-06,
1702
+ "loss": 0.0646,
1703
  "step": 6575
1704
  },
1705
  {
1706
  "epoch": 2.0,
1707
+ "learning_rate": 1.8746666666666668e-06,
1708
+ "loss": 0.0518,
1709
  "step": 6600
1710
  },
1711
  {
1712
  "epoch": 2.0,
1713
+ "learning_rate": 1.8413333333333337e-06,
1714
+ "loss": 0.0731,
1715
  "step": 6625
1716
  },
1717
  {
1718
  "epoch": 2.01,
1719
+ "learning_rate": 1.808e-06,
1720
+ "loss": 0.0727,
1721
  "step": 6650
1722
  },
1723
  {
1724
  "epoch": 2.01,
1725
+ "learning_rate": 1.7746666666666667e-06,
1726
+ "loss": 0.0486,
1727
  "step": 6675
1728
  },
1729
  {
1730
  "epoch": 2.01,
1731
+ "learning_rate": 1.7413333333333336e-06,
1732
+ "loss": 0.052,
1733
  "step": 6700
1734
  },
1735
  {
1736
  "epoch": 2.02,
1737
+ "learning_rate": 1.7080000000000002e-06,
1738
+ "loss": 0.0494,
1739
  "step": 6725
1740
  },
1741
  {
1742
  "epoch": 2.02,
1743
+ "learning_rate": 1.6746666666666668e-06,
1744
+ "loss": 0.0393,
1745
  "step": 6750
1746
  },
1747
  {
1748
  "epoch": 2.02,
1749
+ "learning_rate": 1.6413333333333335e-06,
1750
+ "loss": 0.0436,
1751
  "step": 6775
1752
  },
1753
  {
1754
  "epoch": 2.03,
1755
+ "learning_rate": 1.608e-06,
1756
+ "loss": 0.0353,
1757
  "step": 6800
1758
  },
1759
  {
1760
  "epoch": 2.03,
1761
+ "learning_rate": 1.5746666666666667e-06,
1762
+ "loss": 0.0417,
1763
  "step": 6825
1764
  },
1765
  {
1766
  "epoch": 2.03,
1767
+ "learning_rate": 1.5413333333333336e-06,
1768
+ "loss": 0.0443,
1769
  "step": 6850
1770
  },
1771
  {
1772
  "epoch": 2.04,
1773
+ "learning_rate": 1.508e-06,
1774
+ "loss": 0.0409,
1775
  "step": 6875
1776
  },
1777
  {
1778
  "epoch": 2.04,
1779
+ "learning_rate": 1.4746666666666668e-06,
1780
+ "loss": 0.0454,
1781
  "step": 6900
1782
  },
1783
  {
1784
  "epoch": 2.04,
1785
+ "learning_rate": 1.4413333333333335e-06,
1786
+ "loss": 0.0391,
1787
  "step": 6925
1788
  },
1789
  {
1790
  "epoch": 2.04,
1791
+ "learning_rate": 1.4080000000000001e-06,
1792
+ "loss": 0.0411,
1793
  "step": 6950
1794
  },
1795
  {
1796
  "epoch": 2.05,
1797
+ "learning_rate": 1.3746666666666667e-06,
1798
+ "loss": 0.0278,
1799
  "step": 6975
1800
  },
1801
  {
1802
  "epoch": 2.05,
1803
+ "learning_rate": 1.3413333333333334e-06,
1804
+ "loss": 0.027,
1805
  "step": 7000
1806
  },
1807
  {
1808
  "epoch": 2.05,
1809
+ "eval_loss": 0.23039141297340393,
1810
+ "eval_runtime": 1798.6414,
1811
+ "eval_samples_per_second": 3.664,
1812
+ "eval_steps_per_second": 0.458,
1813
+ "eval_wer": 13.380452833245576,
1814
  "step": 7000
1815
  },
1816
  {
1817
  "epoch": 2.05,
1818
+ "learning_rate": 1.308e-06,
1819
+ "loss": 0.0273,
1820
  "step": 7025
1821
  },
1822
  {
1823
  "epoch": 2.06,
1824
+ "learning_rate": 1.2746666666666669e-06,
1825
+ "loss": 0.0373,
1826
  "step": 7050
1827
  },
1828
  {
1829
  "epoch": 2.06,
1830
+ "learning_rate": 1.2413333333333335e-06,
1831
+ "loss": 0.0207,
1832
  "step": 7075
1833
  },
1834
  {
1835
  "epoch": 2.06,
1836
+ "learning_rate": 1.2080000000000001e-06,
1837
+ "loss": 0.0286,
1838
  "step": 7100
1839
  },
1840
  {
1841
  "epoch": 2.07,
1842
+ "learning_rate": 1.1746666666666668e-06,
1843
+ "loss": 0.0269,
1844
  "step": 7125
1845
  },
1846
  {
1847
  "epoch": 2.07,
1848
+ "learning_rate": 1.1413333333333334e-06,
1849
+ "loss": 0.0228,
1850
  "step": 7150
1851
  },
1852
  {
1853
  "epoch": 2.07,
1854
+ "learning_rate": 1.108e-06,
1855
+ "loss": 0.028,
1856
  "step": 7175
1857
  },
1858
  {
1859
  "epoch": 2.08,
1860
+ "learning_rate": 1.0746666666666669e-06,
1861
+ "loss": 0.0377,
1862
  "step": 7200
1863
  },
1864
  {
1865
  "epoch": 2.08,
1866
+ "learning_rate": 1.0413333333333333e-06,
1867
+ "loss": 0.0228,
1868
  "step": 7225
1869
  },
1870
  {
1871
  "epoch": 2.08,
1872
+ "learning_rate": 1.0080000000000001e-06,
1873
+ "loss": 0.0327,
1874
  "step": 7250
1875
  },
1876
  {
1877
  "epoch": 2.09,
1878
+ "learning_rate": 9.746666666666668e-07,
1879
+ "loss": 0.0354,
1880
  "step": 7275
1881
  },
1882
  {
1883
  "epoch": 2.09,
1884
+ "learning_rate": 9.413333333333334e-07,
1885
+ "loss": 0.0314,
1886
  "step": 7300
1887
  },
1888
  {
1889
  "epoch": 2.09,
1890
+ "learning_rate": 9.080000000000001e-07,
1891
+ "loss": 0.0347,
1892
  "step": 7325
1893
  },
1894
  {
1895
  "epoch": 2.09,
1896
+ "learning_rate": 8.746666666666668e-07,
1897
+ "loss": 0.0266,
1898
  "step": 7350
1899
  },
1900
  {
1901
  "epoch": 2.1,
1902
+ "learning_rate": 8.413333333333334e-07,
1903
+ "loss": 0.0327,
1904
  "step": 7375
1905
  },
1906
  {
1907
  "epoch": 2.1,
1908
+ "learning_rate": 8.08e-07,
1909
+ "loss": 0.0185,
1910
  "step": 7400
1911
  },
1912
  {
1913
  "epoch": 2.1,
1914
+ "learning_rate": 7.746666666666668e-07,
1915
+ "loss": 0.0214,
1916
  "step": 7425
1917
  },
1918
  {
1919
  "epoch": 2.11,
1920
+ "learning_rate": 7.413333333333333e-07,
1921
+ "loss": 0.0317,
1922
  "step": 7450
1923
  },
1924
  {
1925
  "epoch": 2.11,
1926
+ "learning_rate": 7.08e-07,
1927
+ "loss": 0.0275,
1928
  "step": 7475
1929
  },
1930
  {
1931
  "epoch": 2.11,
1932
+ "learning_rate": 6.746666666666667e-07,
1933
+ "loss": 0.0231,
1934
  "step": 7500
1935
  },
1936
  {
1937
  "epoch": 2.11,
1938
+ "eval_loss": 0.22874309122562408,
1939
+ "eval_runtime": 1793.9081,
1940
+ "eval_samples_per_second": 3.674,
1941
+ "eval_steps_per_second": 0.459,
1942
+ "eval_wer": 12.839726193851513,
1943
  "step": 7500
1944
  },
1945
  {
1946
  "epoch": 2.12,
1947
+ "learning_rate": 6.413333333333334e-07,
1948
+ "loss": 0.0267,
1949
  "step": 7525
1950
  },
1951
  {
1952
  "epoch": 2.12,
1953
+ "learning_rate": 6.08e-07,
1954
+ "loss": 0.0251,
1955
  "step": 7550
1956
  },
1957
  {
1958
  "epoch": 2.12,
1959
+ "learning_rate": 5.746666666666667e-07,
1960
+ "loss": 0.0234,
1961
  "step": 7575
1962
  },
1963
  {
1964
  "epoch": 2.13,
1965
+ "learning_rate": 5.413333333333334e-07,
1966
+ "loss": 0.0352,
1967
  "step": 7600
1968
  },
1969
  {
1970
  "epoch": 2.13,
1971
+ "learning_rate": 5.08e-07,
1972
+ "loss": 0.0238,
1973
  "step": 7625
1974
  },
1975
  {
1976
  "epoch": 2.13,
1977
+ "learning_rate": 4.746666666666667e-07,
1978
+ "loss": 0.0245,
1979
  "step": 7650
1980
  },
1981
  {
1982
  "epoch": 2.14,
1983
+ "learning_rate": 4.413333333333333e-07,
1984
+ "loss": 0.0247,
1985
  "step": 7675
1986
  },
1987
  {
1988
  "epoch": 2.14,
1989
+ "learning_rate": 4.0800000000000005e-07,
1990
+ "loss": 0.0211,
1991
  "step": 7700
1992
  },
1993
  {
1994
  "epoch": 2.14,
1995
+ "learning_rate": 3.7466666666666674e-07,
1996
+ "loss": 0.0342,
1997
  "step": 7725
1998
  },
1999
  {
2000
  "epoch": 2.14,
2001
+ "learning_rate": 3.4133333333333337e-07,
2002
+ "loss": 0.0184,
2003
  "step": 7750
2004
  },
2005
  {
2006
  "epoch": 2.15,
2007
+ "learning_rate": 3.0800000000000006e-07,
2008
+ "loss": 0.0299,
2009
  "step": 7775
2010
  },
2011
  {
2012
  "epoch": 2.15,
2013
+ "learning_rate": 2.746666666666667e-07,
2014
+ "loss": 0.0139,
2015
  "step": 7800
2016
  },
2017
  {
2018
  "epoch": 2.15,
2019
+ "learning_rate": 2.413333333333333e-07,
2020
+ "loss": 0.0273,
2021
  "step": 7825
2022
  },
2023
  {
2024
  "epoch": 2.16,
2025
+ "learning_rate": 2.08e-07,
2026
+ "loss": 0.0201,
2027
  "step": 7850
2028
  },
2029
  {
2030
  "epoch": 2.16,
2031
+ "learning_rate": 1.7466666666666667e-07,
2032
+ "loss": 0.0237,
2033
  "step": 7875
2034
  },
2035
  {
2036
  "epoch": 2.16,
2037
+ "learning_rate": 1.4133333333333333e-07,
2038
+ "loss": 0.0227,
2039
  "step": 7900
2040
  },
2041
  {
2042
  "epoch": 2.17,
2043
+ "learning_rate": 1.0800000000000001e-07,
2044
+ "loss": 0.02,
2045
  "step": 7925
2046
  },
2047
  {
2048
  "epoch": 2.17,
2049
+ "learning_rate": 7.466666666666667e-08,
2050
+ "loss": 0.019,
2051
  "step": 7950
2052
  },
2053
  {
2054
  "epoch": 2.17,
2055
+ "learning_rate": 4.133333333333334e-08,
2056
+ "loss": 0.0285,
2057
  "step": 7975
2058
  },
2059
  {
2060
  "epoch": 2.18,
2061
+ "learning_rate": 8e-09,
2062
+ "loss": 0.0285,
2063
  "step": 8000
2064
  },
2065
  {
2066
  "epoch": 2.18,
2067
+ "eval_loss": 0.23037254810333252,
2068
+ "eval_runtime": 1820.2026,
2069
+ "eval_samples_per_second": 3.621,
2070
+ "eval_steps_per_second": 0.453,
2071
+ "eval_wer": 12.8883308355948,
2072
  "step": 8000
2073
  },
2074
  {
2075
  "epoch": 2.18,
2076
  "step": 8000,
2077
  "total_flos": 3.265527462100992e+19,
2078
+ "train_loss": 0.18796414549276233,
2079
+ "train_runtime": 39400.0429,
2080
+ "train_samples_per_second": 0.812,
2081
+ "train_steps_per_second": 0.203
2082
  }
2083
  ],
2084
  "max_steps": 8000,