xezpeleta commited on
Commit
34f406d
1 Parent(s): b43bcdc

End of training

Browse files
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 2.06,
3
- "eval_loss": 0.2200811207294464,
4
- "eval_runtime": 1786.3982,
5
- "eval_samples_per_second": 3.69,
6
- "eval_steps_per_second": 0.461,
7
- "eval_wer": 13.179958686054519,
8
- "train_loss": 0.208335678424154,
9
- "train_runtime": 23034.2525,
10
- "train_samples_per_second": 1.216,
11
- "train_steps_per_second": 0.304
12
  }
 
1
  {
2
+ "epoch": 2.18,
3
+ "eval_loss": 0.23755376040935516,
4
+ "eval_runtime": 1799.3473,
5
+ "eval_samples_per_second": 3.663,
6
+ "eval_steps_per_second": 0.458,
7
+ "eval_wer": 14.119648426424725,
8
+ "train_loss": 0.191678307980299,
9
+ "train_runtime": 39548.417,
10
+ "train_samples_per_second": 0.809,
11
+ "train_steps_per_second": 0.202
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.06,
3
- "eval_loss": 0.2200811207294464,
4
- "eval_runtime": 1786.3982,
5
- "eval_samples_per_second": 3.69,
6
- "eval_steps_per_second": 0.461,
7
- "eval_wer": 13.179958686054519
8
  }
 
1
  {
2
+ "epoch": 2.18,
3
+ "eval_loss": 0.23755376040935516,
4
+ "eval_runtime": 1799.3473,
5
+ "eval_samples_per_second": 3.663,
6
+ "eval_steps_per_second": 0.458,
7
+ "eval_wer": 14.119648426424725
8
  }
runs/Jul23_22-52-08_tknadmin-System-Product-Name/events.out.tfevents.1690190849.tknadmin-System-Product-Name.112413.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e39d2a819f737dffed2cea99ea905f4ae841c5a3f843dfa0d74c39231ce0ebf
3
+ size 358
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 2.06,
3
- "train_loss": 0.208335678424154,
4
- "train_runtime": 23034.2525,
5
- "train_samples_per_second": 1.216,
6
- "train_steps_per_second": 0.304
7
  }
 
1
  {
2
+ "epoch": 2.18,
3
+ "train_loss": 0.191678307980299,
4
+ "train_runtime": 39548.417,
5
+ "train_samples_per_second": 0.809,
6
+ "train_steps_per_second": 0.202
7
  }
trainer_state.json CHANGED
@@ -1,1768 +1,2089 @@
1
  {
2
- "best_metric": 13.179958686054519,
3
- "best_model_checkpoint": "./checkpoint-7000",
4
- "epoch": 2.0582857142857143,
5
- "global_step": 7000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.0,
12
- "learning_rate": 4.6000000000000004e-07,
13
- "loss": 1.4182,
14
  "step": 25
15
  },
16
  {
17
  "epoch": 0.01,
18
- "learning_rate": 9.400000000000001e-07,
19
- "loss": 1.292,
20
  "step": 50
21
  },
22
  {
23
  "epoch": 0.01,
24
- "learning_rate": 1.44e-06,
25
- "loss": 1.0018,
26
  "step": 75
27
  },
28
  {
29
  "epoch": 0.01,
30
- "learning_rate": 1.94e-06,
31
- "loss": 0.7765,
32
  "step": 100
33
  },
34
  {
35
  "epoch": 0.02,
36
- "learning_rate": 2.4400000000000004e-06,
37
- "loss": 0.7103,
38
  "step": 125
39
  },
40
  {
41
  "epoch": 0.02,
42
- "learning_rate": 2.9400000000000002e-06,
43
- "loss": 0.6597,
44
  "step": 150
45
  },
46
  {
47
- "epoch": 0.03,
48
- "learning_rate": 3.44e-06,
49
- "loss": 0.6657,
50
  "step": 175
51
  },
52
  {
53
  "epoch": 0.03,
54
- "learning_rate": 3.94e-06,
55
- "loss": 0.5853,
56
  "step": 200
57
  },
58
  {
59
  "epoch": 0.03,
60
- "learning_rate": 4.440000000000001e-06,
61
- "loss": 0.5273,
62
  "step": 225
63
  },
64
  {
65
- "epoch": 0.04,
66
- "learning_rate": 4.94e-06,
67
- "loss": 0.5979,
68
  "step": 250
69
  },
70
  {
71
- "epoch": 0.04,
72
- "learning_rate": 5.4400000000000004e-06,
73
- "loss": 0.5861,
74
  "step": 275
75
  },
76
  {
77
  "epoch": 0.04,
78
- "learning_rate": 5.94e-06,
79
- "loss": 0.5085,
80
  "step": 300
81
  },
82
  {
83
- "epoch": 0.05,
84
- "learning_rate": 6.440000000000001e-06,
85
- "loss": 0.4827,
86
  "step": 325
87
  },
88
  {
89
- "epoch": 0.05,
90
- "learning_rate": 6.9400000000000005e-06,
91
- "loss": 0.4909,
92
  "step": 350
93
  },
94
  {
95
  "epoch": 0.05,
96
- "learning_rate": 7.440000000000001e-06,
97
- "loss": 0.4651,
98
  "step": 375
99
  },
100
  {
101
- "epoch": 0.06,
102
- "learning_rate": 7.94e-06,
103
- "loss": 0.494,
104
  "step": 400
105
  },
106
  {
107
- "epoch": 0.06,
108
- "learning_rate": 8.44e-06,
109
- "loss": 0.4188,
110
  "step": 425
111
  },
112
  {
113
  "epoch": 0.06,
114
- "learning_rate": 8.94e-06,
115
- "loss": 0.3849,
116
  "step": 450
117
  },
118
  {
119
- "epoch": 0.07,
120
- "learning_rate": 9.440000000000001e-06,
121
- "loss": 0.4577,
122
  "step": 475
123
  },
124
  {
125
- "epoch": 0.07,
126
- "learning_rate": 9.940000000000001e-06,
127
- "loss": 0.4415,
 
 
 
 
 
 
 
 
 
128
  "step": 500
129
  },
130
  {
131
  "epoch": 0.07,
132
- "learning_rate": 9.966153846153847e-06,
133
- "loss": 0.4614,
134
  "step": 525
135
  },
136
  {
137
- "epoch": 0.08,
138
- "learning_rate": 9.927692307692309e-06,
139
- "loss": 0.4283,
140
  "step": 550
141
  },
142
  {
143
- "epoch": 0.08,
144
- "learning_rate": 9.88923076923077e-06,
145
- "loss": 0.4486,
146
  "step": 575
147
  },
148
  {
149
- "epoch": 0.09,
150
- "learning_rate": 9.850769230769231e-06,
151
- "loss": 0.4434,
152
  "step": 600
153
  },
154
  {
155
- "epoch": 0.09,
156
- "learning_rate": 9.812307692307694e-06,
157
- "loss": 0.4245,
158
  "step": 625
159
  },
160
  {
161
- "epoch": 0.09,
162
- "learning_rate": 9.773846153846154e-06,
163
- "loss": 0.4254,
164
  "step": 650
165
  },
166
  {
167
- "epoch": 0.1,
168
- "learning_rate": 9.735384615384616e-06,
169
- "loss": 0.4143,
170
  "step": 675
171
  },
172
  {
173
- "epoch": 0.1,
174
- "learning_rate": 9.696923076923078e-06,
175
- "loss": 0.4115,
176
  "step": 700
177
  },
178
  {
179
- "epoch": 0.1,
180
- "learning_rate": 9.658461538461539e-06,
181
- "loss": 0.3656,
182
  "step": 725
183
  },
184
  {
185
- "epoch": 0.11,
186
- "learning_rate": 9.620000000000001e-06,
187
- "loss": 0.4004,
188
  "step": 750
189
  },
190
  {
191
- "epoch": 0.11,
192
- "learning_rate": 9.581538461538462e-06,
193
- "loss": 0.388,
194
  "step": 775
195
  },
196
  {
197
- "epoch": 0.11,
198
- "learning_rate": 9.543076923076924e-06,
199
- "loss": 0.3665,
200
  "step": 800
201
  },
202
  {
203
- "epoch": 0.12,
204
- "learning_rate": 9.504615384615386e-06,
205
- "loss": 0.3777,
206
  "step": 825
207
  },
208
  {
209
- "epoch": 0.12,
210
- "learning_rate": 9.466153846153846e-06,
211
- "loss": 0.4338,
212
  "step": 850
213
  },
214
  {
215
- "epoch": 0.12,
216
- "learning_rate": 9.427692307692309e-06,
217
- "loss": 0.4017,
218
  "step": 875
219
  },
220
  {
221
- "epoch": 0.13,
222
- "learning_rate": 9.38923076923077e-06,
223
- "loss": 0.3757,
224
  "step": 900
225
  },
226
  {
227
- "epoch": 0.13,
228
- "learning_rate": 9.350769230769231e-06,
229
- "loss": 0.4084,
230
  "step": 925
231
  },
232
  {
233
- "epoch": 0.14,
234
- "learning_rate": 9.312307692307693e-06,
235
- "loss": 0.3821,
236
  "step": 950
237
  },
238
  {
239
- "epoch": 0.14,
240
- "learning_rate": 9.273846153846154e-06,
241
- "loss": 0.421,
242
  "step": 975
243
  },
244
  {
245
- "epoch": 0.14,
246
- "learning_rate": 9.235384615384616e-06,
247
- "loss": 0.4203,
248
  "step": 1000
249
  },
250
  {
251
- "epoch": 0.14,
252
- "eval_loss": 0.4127572178840637,
253
- "eval_runtime": 1802.5775,
254
- "eval_samples_per_second": 3.656,
255
- "eval_steps_per_second": 0.457,
256
- "eval_wer": 28.265624367127064,
257
  "step": 1000
258
  },
259
  {
260
- "epoch": 0.15,
261
- "learning_rate": 9.196923076923078e-06,
262
- "loss": 0.4094,
263
  "step": 1025
264
  },
265
  {
266
- "epoch": 0.15,
267
- "learning_rate": 9.158461538461539e-06,
268
- "loss": 0.3674,
269
  "step": 1050
270
  },
271
  {
272
- "epoch": 0.15,
273
- "learning_rate": 9.12e-06,
274
- "loss": 0.3613,
275
  "step": 1075
276
  },
277
  {
278
- "epoch": 0.16,
279
- "learning_rate": 9.081538461538463e-06,
280
- "loss": 0.3904,
281
  "step": 1100
282
  },
283
  {
284
- "epoch": 0.16,
285
- "learning_rate": 9.043076923076923e-06,
286
- "loss": 0.3865,
287
  "step": 1125
288
  },
289
  {
290
- "epoch": 0.16,
291
- "learning_rate": 9.004615384615386e-06,
292
- "loss": 0.3677,
293
  "step": 1150
294
  },
295
  {
296
- "epoch": 0.17,
297
- "learning_rate": 8.966153846153846e-06,
298
- "loss": 0.3295,
299
  "step": 1175
300
  },
301
  {
302
- "epoch": 0.17,
303
- "learning_rate": 8.927692307692308e-06,
304
- "loss": 0.3207,
305
  "step": 1200
306
  },
307
  {
308
- "epoch": 0.17,
309
- "learning_rate": 8.88923076923077e-06,
310
- "loss": 0.2817,
311
  "step": 1225
312
  },
313
  {
314
- "epoch": 0.18,
315
- "learning_rate": 8.850769230769231e-06,
316
- "loss": 0.3218,
317
  "step": 1250
318
  },
319
  {
320
- "epoch": 0.18,
321
- "learning_rate": 8.812307692307693e-06,
322
- "loss": 0.3502,
323
  "step": 1275
324
  },
325
  {
326
- "epoch": 0.19,
327
- "learning_rate": 8.773846153846155e-06,
328
- "loss": 0.3432,
329
  "step": 1300
330
  },
331
  {
332
- "epoch": 0.19,
333
- "learning_rate": 8.735384615384616e-06,
334
- "loss": 0.3244,
335
  "step": 1325
336
  },
337
  {
338
- "epoch": 0.19,
339
- "learning_rate": 8.696923076923078e-06,
340
- "loss": 0.2607,
341
  "step": 1350
342
  },
343
  {
344
- "epoch": 0.2,
345
- "learning_rate": 8.658461538461538e-06,
346
- "loss": 0.3469,
347
  "step": 1375
348
  },
349
  {
350
- "epoch": 0.2,
351
- "learning_rate": 8.62e-06,
352
- "loss": 0.2756,
353
  "step": 1400
354
  },
355
  {
356
- "epoch": 0.2,
357
- "learning_rate": 8.581538461538463e-06,
358
- "loss": 0.294,
359
  "step": 1425
360
  },
361
  {
362
- "epoch": 0.21,
363
- "learning_rate": 8.543076923076923e-06,
364
- "loss": 0.3915,
365
  "step": 1450
366
  },
367
  {
368
- "epoch": 0.21,
369
- "learning_rate": 8.504615384615385e-06,
370
- "loss": 0.3237,
371
  "step": 1475
372
  },
373
  {
374
- "epoch": 0.21,
375
- "learning_rate": 8.466153846153847e-06,
376
- "loss": 0.2723,
377
  "step": 1500
378
  },
379
  {
380
- "epoch": 0.22,
381
- "learning_rate": 8.427692307692308e-06,
382
- "loss": 0.2917,
 
 
 
 
 
 
 
 
 
383
  "step": 1525
384
  },
385
  {
386
- "epoch": 0.22,
387
- "learning_rate": 8.38923076923077e-06,
388
- "loss": 0.2921,
389
  "step": 1550
390
  },
391
  {
392
- "epoch": 0.23,
393
- "learning_rate": 8.35076923076923e-06,
394
- "loss": 0.3724,
395
  "step": 1575
396
  },
397
  {
398
- "epoch": 0.23,
399
- "learning_rate": 8.312307692307693e-06,
400
- "loss": 0.3364,
401
  "step": 1600
402
  },
403
  {
404
- "epoch": 0.23,
405
- "learning_rate": 8.273846153846155e-06,
406
- "loss": 0.3661,
407
  "step": 1625
408
  },
409
  {
410
- "epoch": 0.24,
411
- "learning_rate": 8.235384615384615e-06,
412
- "loss": 0.3307,
413
  "step": 1650
414
  },
415
  {
416
- "epoch": 0.24,
417
- "learning_rate": 8.196923076923078e-06,
418
- "loss": 0.3201,
419
  "step": 1675
420
  },
421
  {
422
- "epoch": 0.24,
423
- "learning_rate": 8.15846153846154e-06,
424
- "loss": 0.2973,
425
  "step": 1700
426
  },
427
  {
428
- "epoch": 0.25,
429
- "learning_rate": 8.120000000000002e-06,
430
- "loss": 0.3013,
431
  "step": 1725
432
  },
433
  {
434
- "epoch": 0.25,
435
- "learning_rate": 8.081538461538462e-06,
436
- "loss": 0.3349,
437
  "step": 1750
438
  },
439
  {
440
- "epoch": 0.25,
441
- "learning_rate": 8.043076923076923e-06,
442
- "loss": 0.3244,
443
  "step": 1775
444
  },
445
  {
446
- "epoch": 0.26,
447
- "learning_rate": 8.004615384615385e-06,
448
- "loss": 0.3331,
449
  "step": 1800
450
  },
451
  {
452
- "epoch": 0.26,
453
- "learning_rate": 7.966153846153847e-06,
454
- "loss": 0.2723,
455
  "step": 1825
456
  },
457
  {
458
- "epoch": 0.26,
459
- "learning_rate": 7.927692307692308e-06,
460
- "loss": 0.316,
461
  "step": 1850
462
  },
463
  {
464
- "epoch": 0.27,
465
- "learning_rate": 7.88923076923077e-06,
466
- "loss": 0.2648,
467
  "step": 1875
468
  },
469
  {
470
- "epoch": 0.27,
471
- "learning_rate": 7.850769230769232e-06,
472
- "loss": 0.2965,
473
  "step": 1900
474
  },
475
  {
476
- "epoch": 0.28,
477
- "learning_rate": 7.812307692307694e-06,
478
- "loss": 0.256,
479
  "step": 1925
480
  },
481
  {
482
- "epoch": 0.28,
483
- "learning_rate": 7.773846153846155e-06,
484
- "loss": 0.2156,
485
  "step": 1950
486
  },
487
  {
488
- "epoch": 0.28,
489
- "learning_rate": 7.735384615384615e-06,
490
- "loss": 0.2418,
491
  "step": 1975
492
  },
493
  {
494
- "epoch": 0.29,
495
- "learning_rate": 7.696923076923077e-06,
496
- "loss": 0.2693,
497
  "step": 2000
498
  },
499
  {
500
- "epoch": 0.29,
501
- "eval_loss": 0.3239877223968506,
502
- "eval_runtime": 1817.7748,
503
- "eval_samples_per_second": 3.626,
504
  "eval_steps_per_second": 0.453,
505
- "eval_wer": 22.052330997610273,
506
  "step": 2000
507
  },
508
  {
509
- "epoch": 0.29,
510
- "learning_rate": 7.65846153846154e-06,
511
- "loss": 0.2191,
512
  "step": 2025
513
  },
514
  {
515
- "epoch": 0.29,
516
- "learning_rate": 7.620000000000001e-06,
517
- "loss": 0.2437,
518
  "step": 2050
519
  },
520
  {
521
- "epoch": 0.3,
522
- "learning_rate": 7.581538461538462e-06,
523
- "loss": 0.2519,
524
  "step": 2075
525
  },
526
  {
527
- "epoch": 0.3,
528
- "learning_rate": 7.544615384615386e-06,
529
- "loss": 0.3198,
530
  "step": 2100
531
  },
532
  {
533
- "epoch": 0.3,
534
- "learning_rate": 7.5061538461538465e-06,
535
- "loss": 0.2708,
536
  "step": 2125
537
  },
538
  {
539
- "epoch": 0.31,
540
- "learning_rate": 7.467692307692308e-06,
541
- "loss": 0.265,
542
  "step": 2150
543
  },
544
  {
545
- "epoch": 0.31,
546
- "learning_rate": 7.42923076923077e-06,
547
- "loss": 0.2935,
548
  "step": 2175
549
  },
550
  {
551
- "epoch": 0.31,
552
- "learning_rate": 7.390769230769231e-06,
553
- "loss": 0.2595,
554
  "step": 2200
555
  },
556
  {
557
- "epoch": 0.32,
558
- "learning_rate": 7.3523076923076935e-06,
559
- "loss": 0.2613,
560
  "step": 2225
561
  },
562
  {
563
- "epoch": 0.32,
564
- "learning_rate": 7.313846153846155e-06,
565
- "loss": 0.2229,
566
  "step": 2250
567
  },
568
  {
569
- "epoch": 0.33,
570
- "learning_rate": 7.275384615384616e-06,
571
- "loss": 0.2814,
572
  "step": 2275
573
  },
574
  {
575
- "epoch": 0.33,
576
- "learning_rate": 7.236923076923078e-06,
577
- "loss": 0.2681,
578
  "step": 2300
579
  },
580
  {
581
- "epoch": 0.33,
582
- "learning_rate": 7.198461538461539e-06,
583
- "loss": 0.2531,
584
  "step": 2325
585
  },
586
  {
587
- "epoch": 0.34,
588
- "learning_rate": 7.16e-06,
589
- "loss": 0.2104,
590
  "step": 2350
591
  },
592
  {
593
- "epoch": 0.34,
594
- "learning_rate": 7.121538461538462e-06,
595
- "loss": 0.2294,
596
  "step": 2375
597
  },
598
  {
599
- "epoch": 0.34,
600
- "learning_rate": 7.0830769230769235e-06,
601
- "loss": 0.2556,
602
  "step": 2400
603
  },
604
  {
605
- "epoch": 0.35,
606
- "learning_rate": 7.044615384615386e-06,
607
- "loss": 0.2185,
608
  "step": 2425
609
  },
610
  {
611
- "epoch": 0.35,
612
- "learning_rate": 7.006153846153847e-06,
613
- "loss": 0.2585,
614
  "step": 2450
615
  },
616
  {
617
- "epoch": 0.35,
618
- "learning_rate": 6.967692307692308e-06,
619
- "loss": 0.2325,
620
  "step": 2475
621
  },
622
  {
623
- "epoch": 0.36,
624
- "learning_rate": 6.9292307692307705e-06,
625
- "loss": 0.2068,
626
  "step": 2500
627
  },
628
  {
629
- "epoch": 0.36,
630
- "learning_rate": 6.890769230769231e-06,
631
- "loss": 0.2233,
 
 
 
 
 
 
 
 
 
632
  "step": 2525
633
  },
634
  {
635
- "epoch": 0.36,
636
- "learning_rate": 6.852307692307692e-06,
637
- "loss": 0.2649,
638
  "step": 2550
639
  },
640
  {
641
- "epoch": 0.37,
642
- "learning_rate": 6.8138461538461545e-06,
643
- "loss": 0.2524,
644
  "step": 2575
645
  },
646
  {
647
- "epoch": 0.37,
648
- "learning_rate": 6.775384615384616e-06,
649
- "loss": 0.2847,
650
  "step": 2600
651
  },
652
  {
653
- "epoch": 0.38,
654
- "learning_rate": 6.736923076923078e-06,
655
- "loss": 0.1954,
656
  "step": 2625
657
  },
658
  {
659
- "epoch": 0.38,
660
- "learning_rate": 6.698461538461539e-06,
661
- "loss": 0.2553,
662
  "step": 2650
663
  },
664
  {
665
- "epoch": 0.38,
666
- "learning_rate": 6.660000000000001e-06,
667
- "loss": 0.2138,
668
  "step": 2675
669
  },
670
  {
671
- "epoch": 0.39,
672
- "learning_rate": 6.621538461538463e-06,
673
- "loss": 0.2245,
674
  "step": 2700
675
  },
676
  {
677
- "epoch": 0.39,
678
- "learning_rate": 6.583076923076923e-06,
679
- "loss": 0.2137,
680
  "step": 2725
681
  },
682
  {
683
- "epoch": 0.39,
684
- "learning_rate": 6.5446153846153846e-06,
685
- "loss": 0.2558,
686
  "step": 2750
687
  },
688
  {
689
- "epoch": 0.4,
690
- "learning_rate": 6.506153846153847e-06,
691
- "loss": 0.1981,
692
  "step": 2775
693
  },
694
  {
695
- "epoch": 0.4,
696
- "learning_rate": 6.467692307692308e-06,
697
- "loss": 0.2327,
698
  "step": 2800
699
  },
700
  {
701
- "epoch": 0.4,
702
- "learning_rate": 6.42923076923077e-06,
703
- "loss": 0.219,
704
  "step": 2825
705
  },
706
  {
707
- "epoch": 0.41,
708
- "learning_rate": 6.3907692307692315e-06,
709
- "loss": 0.226,
710
  "step": 2850
711
  },
712
  {
713
- "epoch": 0.41,
714
- "learning_rate": 6.352307692307693e-06,
715
- "loss": 0.2192,
716
  "step": 2875
717
  },
718
  {
719
- "epoch": 0.41,
720
- "learning_rate": 6.313846153846155e-06,
721
- "loss": 0.2146,
722
  "step": 2900
723
  },
724
  {
725
- "epoch": 0.42,
726
- "learning_rate": 6.2753846153846155e-06,
727
- "loss": 0.2058,
728
  "step": 2925
729
  },
730
  {
731
- "epoch": 0.42,
732
- "learning_rate": 6.236923076923077e-06,
733
- "loss": 0.2369,
734
  "step": 2950
735
  },
736
  {
737
- "epoch": 0.42,
738
- "learning_rate": 6.198461538461539e-06,
739
- "loss": 0.2202,
740
  "step": 2975
741
  },
742
  {
743
- "epoch": 0.43,
744
- "learning_rate": 6.16e-06,
745
- "loss": 0.2228,
746
  "step": 3000
747
  },
748
  {
749
- "epoch": 0.43,
750
- "eval_loss": 0.27374517917633057,
751
- "eval_runtime": 1805.7724,
752
- "eval_samples_per_second": 3.65,
753
- "eval_steps_per_second": 0.456,
754
- "eval_wer": 18.14370772408765,
755
  "step": 3000
756
  },
757
  {
758
- "epoch": 0.43,
759
- "learning_rate": 6.1215384615384625e-06,
760
- "loss": 0.2029,
761
  "step": 3025
762
  },
763
  {
764
- "epoch": 0.44,
765
- "learning_rate": 6.083076923076924e-06,
766
- "loss": 0.2085,
767
  "step": 3050
768
  },
769
  {
770
- "epoch": 0.44,
771
- "learning_rate": 6.044615384615385e-06,
772
- "loss": 0.196,
773
  "step": 3075
774
  },
775
  {
776
- "epoch": 0.44,
777
- "learning_rate": 6.006153846153847e-06,
778
- "loss": 0.1999,
779
  "step": 3100
780
  },
781
  {
782
- "epoch": 0.45,
783
- "learning_rate": 5.967692307692308e-06,
784
- "loss": 0.2076,
785
  "step": 3125
786
  },
787
  {
788
- "epoch": 0.45,
789
- "learning_rate": 5.929230769230769e-06,
790
- "loss": 0.1999,
791
  "step": 3150
792
  },
793
  {
794
- "epoch": 0.45,
795
- "learning_rate": 5.890769230769231e-06,
796
- "loss": 0.2361,
797
  "step": 3175
798
  },
799
  {
800
- "epoch": 0.46,
801
- "learning_rate": 5.8523076923076926e-06,
802
- "loss": 0.2003,
803
  "step": 3200
804
  },
805
  {
806
- "epoch": 0.46,
807
- "learning_rate": 5.813846153846155e-06,
808
- "loss": 0.249,
809
  "step": 3225
810
  },
811
  {
812
- "epoch": 0.46,
813
- "learning_rate": 5.775384615384616e-06,
814
- "loss": 0.2724,
815
  "step": 3250
816
  },
817
  {
818
- "epoch": 0.47,
819
- "learning_rate": 5.736923076923077e-06,
820
- "loss": 0.2001,
821
  "step": 3275
822
  },
823
  {
824
  "epoch": 1.0,
825
- "learning_rate": 5.6984615384615395e-06,
826
- "loss": 0.2355,
827
  "step": 3300
828
  },
829
  {
830
  "epoch": 1.0,
831
- "learning_rate": 5.66e-06,
832
- "loss": 0.2297,
833
  "step": 3325
834
  },
835
  {
836
  "epoch": 1.01,
837
- "learning_rate": 5.621538461538461e-06,
838
- "loss": 0.2265,
839
  "step": 3350
840
  },
841
  {
842
  "epoch": 1.01,
843
- "learning_rate": 5.5830769230769235e-06,
844
- "loss": 0.21,
845
  "step": 3375
846
  },
847
  {
848
  "epoch": 1.01,
849
- "learning_rate": 5.544615384615385e-06,
850
- "loss": 0.1944,
851
  "step": 3400
852
  },
853
  {
854
  "epoch": 1.02,
855
- "learning_rate": 5.506153846153847e-06,
856
- "loss": 0.1844,
857
  "step": 3425
858
  },
859
  {
860
  "epoch": 1.02,
861
- "learning_rate": 5.467692307692308e-06,
862
- "loss": 0.1774,
863
  "step": 3450
864
  },
865
  {
866
- "epoch": 1.03,
867
- "learning_rate": 5.42923076923077e-06,
868
- "loss": 0.1688,
869
  "step": 3475
870
  },
871
  {
872
  "epoch": 1.03,
873
- "learning_rate": 5.390769230769232e-06,
874
- "loss": 0.1473,
875
  "step": 3500
876
  },
877
  {
878
  "epoch": 1.03,
879
- "learning_rate": 5.352307692307692e-06,
880
- "loss": 0.1743,
 
 
 
 
 
 
 
 
 
881
  "step": 3525
882
  },
883
  {
884
- "epoch": 1.04,
885
- "learning_rate": 5.313846153846154e-06,
886
- "loss": 0.1759,
887
  "step": 3550
888
  },
889
  {
890
- "epoch": 1.04,
891
- "learning_rate": 5.275384615384616e-06,
892
- "loss": 0.1507,
893
  "step": 3575
894
  },
895
  {
896
  "epoch": 1.04,
897
- "learning_rate": 5.236923076923077e-06,
898
- "loss": 0.1556,
899
  "step": 3600
900
  },
901
  {
902
- "epoch": 1.05,
903
- "learning_rate": 5.198461538461539e-06,
904
- "loss": 0.1545,
905
  "step": 3625
906
  },
907
  {
908
- "epoch": 1.05,
909
- "learning_rate": 5.1600000000000006e-06,
910
- "loss": 0.1143,
911
  "step": 3650
912
  },
913
  {
914
  "epoch": 1.05,
915
- "learning_rate": 5.121538461538462e-06,
916
- "loss": 0.1168,
917
  "step": 3675
918
  },
919
  {
920
- "epoch": 1.06,
921
- "learning_rate": 5.083076923076924e-06,
922
- "loss": 0.1095,
923
  "step": 3700
924
  },
925
  {
926
- "epoch": 1.06,
927
- "learning_rate": 5.044615384615385e-06,
928
- "loss": 0.1098,
929
  "step": 3725
930
  },
931
  {
932
  "epoch": 1.06,
933
- "learning_rate": 5.006153846153846e-06,
934
- "loss": 0.102,
935
  "step": 3750
936
  },
937
  {
938
- "epoch": 1.07,
939
- "learning_rate": 4.967692307692308e-06,
940
- "loss": 0.1263,
941
  "step": 3775
942
  },
943
  {
944
- "epoch": 1.07,
945
- "learning_rate": 4.929230769230769e-06,
946
- "loss": 0.093,
947
  "step": 3800
948
  },
949
  {
950
- "epoch": 1.08,
951
- "learning_rate": 4.8907692307692315e-06,
952
- "loss": 0.1163,
953
  "step": 3825
954
  },
955
  {
956
- "epoch": 1.08,
957
- "learning_rate": 4.852307692307693e-06,
958
- "loss": 0.1005,
959
  "step": 3850
960
  },
961
  {
962
- "epoch": 1.08,
963
- "learning_rate": 4.813846153846154e-06,
964
- "loss": 0.1036,
965
  "step": 3875
966
  },
967
  {
968
- "epoch": 1.09,
969
- "learning_rate": 4.7753846153846155e-06,
970
- "loss": 0.1289,
971
  "step": 3900
972
  },
973
  {
974
- "epoch": 1.09,
975
- "learning_rate": 4.736923076923078e-06,
976
- "loss": 0.1238,
977
  "step": 3925
978
  },
979
  {
980
- "epoch": 1.09,
981
- "learning_rate": 4.698461538461539e-06,
982
- "loss": 0.1218,
983
  "step": 3950
984
  },
985
  {
986
- "epoch": 1.1,
987
- "learning_rate": 4.66e-06,
988
- "loss": 0.1223,
989
  "step": 3975
990
  },
991
  {
992
- "epoch": 1.1,
993
- "learning_rate": 4.621538461538462e-06,
994
- "loss": 0.1002,
995
  "step": 4000
996
  },
997
  {
998
- "epoch": 1.1,
999
- "eval_loss": 0.2553846836090088,
1000
- "eval_runtime": 1819.1705,
1001
- "eval_samples_per_second": 3.623,
1002
- "eval_steps_per_second": 0.453,
1003
- "eval_wer": 16.353436753209934,
1004
  "step": 4000
1005
  },
1006
  {
1007
- "epoch": 1.1,
1008
- "learning_rate": 4.583076923076924e-06,
1009
- "loss": 0.1,
1010
  "step": 4025
1011
  },
1012
  {
1013
- "epoch": 1.11,
1014
- "learning_rate": 4.544615384615385e-06,
1015
- "loss": 0.0925,
1016
  "step": 4050
1017
  },
1018
  {
1019
- "epoch": 1.11,
1020
- "learning_rate": 4.506153846153846e-06,
1021
- "loss": 0.1064,
1022
  "step": 4075
1023
  },
1024
  {
1025
- "epoch": 1.11,
1026
- "learning_rate": 4.467692307692308e-06,
1027
- "loss": 0.1127,
1028
  "step": 4100
1029
  },
1030
  {
1031
- "epoch": 1.12,
1032
- "learning_rate": 4.42923076923077e-06,
1033
- "loss": 0.1036,
1034
  "step": 4125
1035
  },
1036
  {
1037
- "epoch": 1.12,
1038
- "learning_rate": 4.392307692307693e-06,
1039
- "loss": 0.091,
1040
  "step": 4150
1041
  },
1042
  {
1043
- "epoch": 1.13,
1044
- "learning_rate": 4.353846153846154e-06,
1045
- "loss": 0.105,
1046
  "step": 4175
1047
  },
1048
  {
1049
- "epoch": 1.13,
1050
- "learning_rate": 4.315384615384616e-06,
1051
- "loss": 0.1147,
1052
  "step": 4200
1053
  },
1054
  {
1055
- "epoch": 1.13,
1056
- "learning_rate": 4.276923076923078e-06,
1057
- "loss": 0.0897,
1058
  "step": 4225
1059
  },
1060
  {
1061
- "epoch": 1.14,
1062
- "learning_rate": 4.238461538461539e-06,
1063
- "loss": 0.1268,
1064
  "step": 4250
1065
  },
1066
  {
1067
- "epoch": 1.14,
1068
- "learning_rate": 4.2000000000000004e-06,
1069
- "loss": 0.1129,
1070
  "step": 4275
1071
  },
1072
  {
1073
- "epoch": 1.14,
1074
- "learning_rate": 4.161538461538462e-06,
1075
- "loss": 0.1041,
1076
  "step": 4300
1077
  },
1078
  {
1079
- "epoch": 1.15,
1080
- "learning_rate": 4.123076923076924e-06,
1081
- "loss": 0.1061,
1082
  "step": 4325
1083
  },
1084
  {
1085
- "epoch": 1.15,
1086
- "learning_rate": 4.084615384615385e-06,
1087
- "loss": 0.1059,
1088
  "step": 4350
1089
  },
1090
  {
1091
- "epoch": 1.15,
1092
- "learning_rate": 4.0461538461538466e-06,
1093
- "loss": 0.0818,
1094
  "step": 4375
1095
  },
1096
  {
1097
- "epoch": 1.16,
1098
- "learning_rate": 4.007692307692308e-06,
1099
- "loss": 0.087,
1100
  "step": 4400
1101
  },
1102
  {
1103
- "epoch": 1.16,
1104
- "learning_rate": 3.96923076923077e-06,
1105
- "loss": 0.0835,
1106
  "step": 4425
1107
  },
1108
  {
1109
- "epoch": 1.16,
1110
- "learning_rate": 3.930769230769231e-06,
1111
- "loss": 0.0713,
1112
  "step": 4450
1113
  },
1114
  {
1115
- "epoch": 1.17,
1116
- "learning_rate": 3.892307692307693e-06,
1117
- "loss": 0.0792,
1118
  "step": 4475
1119
  },
1120
  {
1121
- "epoch": 1.17,
1122
- "learning_rate": 3.853846153846154e-06,
1123
- "loss": 0.0799,
1124
  "step": 4500
1125
  },
1126
  {
1127
- "epoch": 1.18,
1128
- "learning_rate": 3.815384615384616e-06,
1129
- "loss": 0.0755,
 
 
 
 
 
 
 
 
 
1130
  "step": 4525
1131
  },
1132
  {
1133
- "epoch": 1.18,
1134
- "learning_rate": 3.7769230769230775e-06,
1135
- "loss": 0.0703,
1136
  "step": 4550
1137
  },
1138
  {
1139
- "epoch": 1.18,
1140
- "learning_rate": 3.7384615384615384e-06,
1141
- "loss": 0.11,
1142
  "step": 4575
1143
  },
1144
  {
1145
- "epoch": 1.19,
1146
- "learning_rate": 3.7e-06,
1147
- "loss": 0.0891,
1148
  "step": 4600
1149
  },
1150
  {
1151
- "epoch": 1.19,
1152
- "learning_rate": 3.661538461538462e-06,
1153
- "loss": 0.0757,
1154
  "step": 4625
1155
  },
1156
  {
1157
- "epoch": 1.19,
1158
- "learning_rate": 3.6230769230769236e-06,
1159
- "loss": 0.0699,
1160
  "step": 4650
1161
  },
1162
  {
1163
- "epoch": 1.2,
1164
- "learning_rate": 3.5846153846153845e-06,
1165
- "loss": 0.0839,
1166
  "step": 4675
1167
  },
1168
  {
1169
- "epoch": 1.2,
1170
- "learning_rate": 3.5461538461538463e-06,
1171
- "loss": 0.1029,
1172
  "step": 4700
1173
  },
1174
  {
1175
- "epoch": 1.2,
1176
- "learning_rate": 3.507692307692308e-06,
1177
- "loss": 0.0806,
1178
  "step": 4725
1179
  },
1180
  {
1181
- "epoch": 1.21,
1182
- "learning_rate": 3.4692307692307698e-06,
1183
- "loss": 0.0808,
1184
  "step": 4750
1185
  },
1186
  {
1187
- "epoch": 1.21,
1188
- "learning_rate": 3.4307692307692307e-06,
1189
- "loss": 0.0801,
1190
  "step": 4775
1191
  },
1192
  {
1193
- "epoch": 1.21,
1194
- "learning_rate": 3.3923076923076924e-06,
1195
- "loss": 0.0705,
1196
  "step": 4800
1197
  },
1198
  {
1199
- "epoch": 1.22,
1200
- "learning_rate": 3.353846153846154e-06,
1201
- "loss": 0.0711,
1202
  "step": 4825
1203
  },
1204
  {
1205
- "epoch": 1.22,
1206
- "learning_rate": 3.315384615384616e-06,
1207
- "loss": 0.0721,
1208
  "step": 4850
1209
  },
1210
  {
1211
- "epoch": 1.23,
1212
- "learning_rate": 3.276923076923077e-06,
1213
- "loss": 0.0806,
1214
  "step": 4875
1215
  },
1216
  {
1217
- "epoch": 1.23,
1218
- "learning_rate": 3.2384615384615385e-06,
1219
- "loss": 0.0816,
1220
  "step": 4900
1221
  },
1222
  {
1223
- "epoch": 1.23,
1224
- "learning_rate": 3.2000000000000003e-06,
1225
- "loss": 0.078,
1226
  "step": 4925
1227
  },
1228
  {
1229
- "epoch": 1.24,
1230
- "learning_rate": 3.161538461538462e-06,
1231
- "loss": 0.0858,
1232
  "step": 4950
1233
  },
1234
  {
1235
- "epoch": 1.24,
1236
- "learning_rate": 3.123076923076923e-06,
1237
- "loss": 0.0948,
1238
  "step": 4975
1239
  },
1240
  {
1241
- "epoch": 1.24,
1242
- "learning_rate": 3.0846153846153847e-06,
1243
- "loss": 0.0863,
1244
  "step": 5000
1245
  },
1246
  {
1247
- "epoch": 1.24,
1248
- "eval_loss": 0.23512449860572815,
1249
- "eval_runtime": 1865.1318,
1250
- "eval_samples_per_second": 3.534,
1251
- "eval_steps_per_second": 0.442,
1252
- "eval_wer": 14.787962250394912,
1253
  "step": 5000
1254
  },
1255
  {
1256
- "epoch": 1.25,
1257
- "learning_rate": 3.0461538461538464e-06,
1258
- "loss": 0.0899,
1259
  "step": 5025
1260
  },
1261
  {
1262
- "epoch": 1.25,
1263
- "learning_rate": 3.007692307692308e-06,
1264
- "loss": 0.064,
1265
  "step": 5050
1266
  },
1267
  {
1268
- "epoch": 1.25,
1269
- "learning_rate": 2.969230769230769e-06,
1270
- "loss": 0.0698,
1271
  "step": 5075
1272
  },
1273
  {
1274
- "epoch": 1.26,
1275
- "learning_rate": 2.930769230769231e-06,
1276
- "loss": 0.0794,
1277
  "step": 5100
1278
  },
1279
  {
1280
- "epoch": 1.26,
1281
- "learning_rate": 2.8923076923076925e-06,
1282
- "loss": 0.0758,
1283
  "step": 5125
1284
  },
1285
  {
1286
- "epoch": 1.26,
1287
- "learning_rate": 2.8538461538461543e-06,
1288
- "loss": 0.0645,
1289
  "step": 5150
1290
  },
1291
  {
1292
- "epoch": 1.27,
1293
- "learning_rate": 2.815384615384615e-06,
1294
- "loss": 0.071,
1295
  "step": 5175
1296
  },
1297
  {
1298
- "epoch": 1.27,
1299
- "learning_rate": 2.776923076923077e-06,
1300
- "loss": 0.0555,
1301
  "step": 5200
1302
  },
1303
  {
1304
- "epoch": 1.28,
1305
- "learning_rate": 2.7384615384615387e-06,
1306
- "loss": 0.0727,
1307
  "step": 5225
1308
  },
1309
  {
1310
- "epoch": 1.28,
1311
- "learning_rate": 2.7000000000000004e-06,
1312
- "loss": 0.0714,
1313
  "step": 5250
1314
  },
1315
  {
1316
- "epoch": 1.28,
1317
- "learning_rate": 2.6615384615384613e-06,
1318
- "loss": 0.0631,
1319
  "step": 5275
1320
  },
1321
  {
1322
- "epoch": 1.29,
1323
- "learning_rate": 2.623076923076923e-06,
1324
- "loss": 0.067,
1325
  "step": 5300
1326
  },
1327
  {
1328
- "epoch": 1.29,
1329
- "learning_rate": 2.584615384615385e-06,
1330
- "loss": 0.0519,
1331
  "step": 5325
1332
  },
1333
  {
1334
- "epoch": 1.29,
1335
- "learning_rate": 2.5461538461538465e-06,
1336
- "loss": 0.0629,
1337
  "step": 5350
1338
  },
1339
  {
1340
- "epoch": 1.3,
1341
- "learning_rate": 2.507692307692308e-06,
1342
- "loss": 0.0787,
1343
  "step": 5375
1344
  },
1345
  {
1346
- "epoch": 1.3,
1347
- "learning_rate": 2.469230769230769e-06,
1348
- "loss": 0.0524,
1349
  "step": 5400
1350
  },
1351
  {
1352
- "epoch": 1.3,
1353
- "learning_rate": 2.430769230769231e-06,
1354
- "loss": 0.0618,
1355
  "step": 5425
1356
  },
1357
  {
1358
- "epoch": 1.31,
1359
- "learning_rate": 2.3923076923076922e-06,
1360
- "loss": 0.0693,
1361
  "step": 5450
1362
  },
1363
  {
1364
- "epoch": 1.31,
1365
- "learning_rate": 2.353846153846154e-06,
1366
- "loss": 0.0658,
1367
  "step": 5475
1368
  },
1369
  {
1370
- "epoch": 1.31,
1371
- "learning_rate": 2.3153846153846153e-06,
1372
- "loss": 0.0764,
1373
  "step": 5500
1374
  },
1375
  {
1376
- "epoch": 1.32,
1377
- "learning_rate": 2.276923076923077e-06,
1378
- "loss": 0.0664,
 
 
 
 
 
 
 
 
 
1379
  "step": 5525
1380
  },
1381
  {
1382
- "epoch": 1.32,
1383
- "learning_rate": 2.2384615384615384e-06,
1384
- "loss": 0.045,
1385
  "step": 5550
1386
  },
1387
  {
1388
- "epoch": 1.33,
1389
- "learning_rate": 2.2e-06,
1390
- "loss": 0.065,
1391
  "step": 5575
1392
  },
1393
  {
1394
- "epoch": 1.33,
1395
- "learning_rate": 2.1615384615384614e-06,
1396
- "loss": 0.0683,
1397
  "step": 5600
1398
  },
1399
  {
1400
- "epoch": 1.33,
1401
- "learning_rate": 2.123076923076923e-06,
1402
- "loss": 0.0452,
1403
  "step": 5625
1404
  },
1405
  {
1406
- "epoch": 1.34,
1407
- "learning_rate": 2.0846153846153845e-06,
1408
- "loss": 0.0574,
1409
  "step": 5650
1410
  },
1411
  {
1412
- "epoch": 1.34,
1413
- "learning_rate": 2.0461538461538462e-06,
1414
- "loss": 0.0629,
1415
  "step": 5675
1416
  },
1417
  {
1418
- "epoch": 1.34,
1419
- "learning_rate": 2.0076923076923076e-06,
1420
- "loss": 0.0622,
1421
  "step": 5700
1422
  },
1423
  {
1424
- "epoch": 1.35,
1425
- "learning_rate": 1.9692307692307693e-06,
1426
- "loss": 0.0717,
1427
  "step": 5725
1428
  },
1429
  {
1430
- "epoch": 1.35,
1431
- "learning_rate": 1.930769230769231e-06,
1432
- "loss": 0.0792,
1433
  "step": 5750
1434
  },
1435
  {
1436
- "epoch": 1.35,
1437
- "learning_rate": 1.8923076923076924e-06,
1438
- "loss": 0.078,
1439
  "step": 5775
1440
  },
1441
  {
1442
- "epoch": 1.36,
1443
- "learning_rate": 1.8538461538461541e-06,
1444
- "loss": 0.0534,
1445
  "step": 5800
1446
  },
1447
  {
1448
- "epoch": 1.36,
1449
- "learning_rate": 1.8153846153846154e-06,
1450
- "loss": 0.0583,
1451
  "step": 5825
1452
  },
1453
  {
1454
- "epoch": 1.36,
1455
- "learning_rate": 1.7769230769230772e-06,
1456
- "loss": 0.0493,
1457
  "step": 5850
1458
  },
1459
  {
1460
- "epoch": 1.37,
1461
- "learning_rate": 1.7384615384615385e-06,
1462
- "loss": 0.0566,
1463
  "step": 5875
1464
  },
1465
  {
1466
- "epoch": 1.37,
1467
- "learning_rate": 1.7000000000000002e-06,
1468
- "loss": 0.0478,
1469
  "step": 5900
1470
  },
1471
  {
1472
- "epoch": 1.38,
1473
- "learning_rate": 1.6615384615384616e-06,
1474
- "loss": 0.0477,
1475
  "step": 5925
1476
  },
1477
  {
1478
- "epoch": 1.38,
1479
- "learning_rate": 1.6230769230769233e-06,
1480
- "loss": 0.0889,
1481
  "step": 5950
1482
  },
1483
  {
1484
- "epoch": 1.38,
1485
- "learning_rate": 1.5846153846153846e-06,
1486
- "loss": 0.0553,
1487
  "step": 5975
1488
  },
1489
  {
1490
- "epoch": 1.39,
1491
- "learning_rate": 1.5461538461538464e-06,
1492
- "loss": 0.0636,
1493
  "step": 6000
1494
  },
1495
  {
1496
- "epoch": 1.39,
1497
- "eval_loss": 0.2250746488571167,
1498
- "eval_runtime": 1815.1581,
1499
- "eval_samples_per_second": 3.631,
1500
  "eval_steps_per_second": 0.454,
1501
- "eval_wer": 13.597148527684395,
1502
  "step": 6000
1503
  },
1504
  {
1505
- "epoch": 1.39,
1506
- "learning_rate": 1.5076923076923077e-06,
1507
- "loss": 0.0399,
1508
  "step": 6025
1509
  },
1510
  {
1511
- "epoch": 1.39,
1512
- "learning_rate": 1.4692307692307694e-06,
1513
- "loss": 0.0556,
1514
  "step": 6050
1515
  },
1516
  {
1517
- "epoch": 1.4,
1518
- "learning_rate": 1.4307692307692308e-06,
1519
- "loss": 0.0568,
1520
  "step": 6075
1521
  },
1522
  {
1523
- "epoch": 1.4,
1524
- "learning_rate": 1.3923076923076925e-06,
1525
- "loss": 0.0637,
1526
  "step": 6100
1527
  },
1528
  {
1529
- "epoch": 1.4,
1530
- "learning_rate": 1.3538461538461538e-06,
1531
- "loss": 0.0391,
1532
  "step": 6125
1533
  },
1534
  {
1535
- "epoch": 1.41,
1536
- "learning_rate": 1.3153846153846156e-06,
1537
- "loss": 0.0567,
1538
  "step": 6150
1539
  },
1540
  {
1541
- "epoch": 1.41,
1542
- "learning_rate": 1.2769230769230769e-06,
1543
- "loss": 0.0456,
1544
  "step": 6175
1545
  },
1546
  {
1547
- "epoch": 1.41,
1548
- "learning_rate": 1.2384615384615386e-06,
1549
- "loss": 0.0554,
1550
  "step": 6200
1551
  },
1552
  {
1553
- "epoch": 1.42,
1554
- "learning_rate": 1.2000000000000002e-06,
1555
- "loss": 0.0638,
1556
  "step": 6225
1557
  },
1558
  {
1559
- "epoch": 1.42,
1560
- "learning_rate": 1.1615384615384617e-06,
1561
- "loss": 0.0548,
1562
  "step": 6250
1563
  },
1564
  {
1565
- "epoch": 1.43,
1566
- "learning_rate": 1.1230769230769232e-06,
1567
- "loss": 0.0488,
1568
  "step": 6275
1569
  },
1570
  {
1571
- "epoch": 1.43,
1572
- "learning_rate": 1.0846153846153848e-06,
1573
- "loss": 0.0463,
1574
  "step": 6300
1575
  },
1576
  {
1577
- "epoch": 1.43,
1578
- "learning_rate": 1.0461538461538463e-06,
1579
- "loss": 0.0616,
1580
  "step": 6325
1581
  },
1582
  {
1583
- "epoch": 1.44,
1584
- "learning_rate": 1.0076923076923078e-06,
1585
- "loss": 0.0507,
1586
  "step": 6350
1587
  },
1588
  {
1589
- "epoch": 1.44,
1590
- "learning_rate": 9.692307692307693e-07,
1591
- "loss": 0.0433,
1592
  "step": 6375
1593
  },
1594
  {
1595
- "epoch": 1.44,
1596
- "learning_rate": 9.323076923076923e-07,
1597
- "loss": 0.0565,
1598
  "step": 6400
1599
  },
1600
  {
1601
- "epoch": 1.45,
1602
- "learning_rate": 8.938461538461539e-07,
1603
- "loss": 0.0432,
1604
  "step": 6425
1605
  },
1606
  {
1607
- "epoch": 1.45,
1608
- "learning_rate": 8.553846153846154e-07,
1609
- "loss": 0.0369,
1610
  "step": 6450
1611
  },
1612
  {
1613
- "epoch": 1.45,
1614
- "learning_rate": 8.169230769230769e-07,
1615
- "loss": 0.0548,
1616
  "step": 6475
1617
  },
1618
  {
1619
- "epoch": 1.46,
1620
- "learning_rate": 7.784615384615385e-07,
1621
- "loss": 0.0622,
 
 
 
 
 
 
 
 
 
1622
  "step": 6500
1623
  },
1624
  {
1625
- "epoch": 1.46,
1626
- "learning_rate": 7.4e-07,
1627
- "loss": 0.0558,
1628
  "step": 6525
1629
  },
1630
  {
1631
- "epoch": 1.46,
1632
- "learning_rate": 7.030769230769231e-07,
1633
- "loss": 0.065,
1634
  "step": 6550
1635
  },
1636
  {
1637
- "epoch": 1.47,
1638
- "learning_rate": 6.646153846153846e-07,
1639
- "loss": 0.0607,
1640
  "step": 6575
1641
  },
1642
  {
1643
  "epoch": 2.0,
1644
- "learning_rate": 6.261538461538462e-07,
1645
- "loss": 0.0577,
1646
  "step": 6600
1647
  },
1648
  {
1649
  "epoch": 2.0,
1650
- "learning_rate": 5.876923076923077e-07,
1651
- "loss": 0.0713,
1652
  "step": 6625
1653
  },
1654
  {
1655
  "epoch": 2.01,
1656
- "learning_rate": 5.492307692307692e-07,
1657
- "loss": 0.0697,
1658
  "step": 6650
1659
  },
1660
  {
1661
  "epoch": 2.01,
1662
- "learning_rate": 5.107692307692308e-07,
1663
- "loss": 0.0452,
1664
  "step": 6675
1665
  },
1666
  {
1667
- "epoch": 2.02,
1668
- "learning_rate": 4.723076923076923e-07,
1669
- "loss": 0.0579,
1670
  "step": 6700
1671
  },
1672
  {
1673
  "epoch": 2.02,
1674
- "learning_rate": 4.3384615384615384e-07,
1675
- "loss": 0.05,
1676
  "step": 6725
1677
  },
1678
  {
1679
  "epoch": 2.02,
1680
- "learning_rate": 3.9538461538461537e-07,
1681
- "loss": 0.0397,
1682
  "step": 6750
1683
  },
1684
  {
1685
- "epoch": 2.03,
1686
- "learning_rate": 3.569230769230769e-07,
1687
- "loss": 0.0411,
1688
  "step": 6775
1689
  },
1690
  {
1691
  "epoch": 2.03,
1692
- "learning_rate": 3.184615384615385e-07,
1693
- "loss": 0.0379,
1694
  "step": 6800
1695
  },
1696
  {
1697
  "epoch": 2.03,
1698
- "learning_rate": 2.8e-07,
1699
- "loss": 0.04,
1700
  "step": 6825
1701
  },
1702
  {
1703
- "epoch": 2.04,
1704
- "learning_rate": 2.4153846153846155e-07,
1705
- "loss": 0.0393,
1706
  "step": 6850
1707
  },
1708
  {
1709
  "epoch": 2.04,
1710
- "learning_rate": 2.0307692307692308e-07,
1711
- "loss": 0.0412,
1712
  "step": 6875
1713
  },
1714
  {
1715
  "epoch": 2.04,
1716
- "learning_rate": 1.6461538461538462e-07,
1717
- "loss": 0.0479,
1718
  "step": 6900
1719
  },
1720
  {
1721
- "epoch": 2.05,
1722
- "learning_rate": 1.2615384615384617e-07,
1723
- "loss": 0.0387,
1724
  "step": 6925
1725
  },
1726
  {
1727
- "epoch": 2.05,
1728
- "learning_rate": 8.769230769230769e-08,
1729
- "loss": 0.0453,
1730
  "step": 6950
1731
  },
1732
  {
1733
  "epoch": 2.05,
1734
- "learning_rate": 4.923076923076924e-08,
1735
- "loss": 0.0255,
1736
  "step": 6975
1737
  },
1738
  {
1739
- "epoch": 2.06,
1740
- "learning_rate": 1.076923076923077e-08,
1741
- "loss": 0.0271,
1742
  "step": 7000
1743
  },
1744
  {
1745
- "epoch": 2.06,
1746
- "eval_loss": 0.2200811207294464,
1747
- "eval_runtime": 1807.6155,
1748
- "eval_samples_per_second": 3.646,
1749
- "eval_steps_per_second": 0.456,
1750
- "eval_wer": 13.179958686054519,
1751
  "step": 7000
1752
  },
 
 
 
 
 
 
1753
  {
1754
  "epoch": 2.06,
1755
- "step": 7000,
1756
- "total_flos": 2.857285499092992e+19,
1757
- "train_loss": 0.208335678424154,
1758
- "train_runtime": 23034.2525,
1759
- "train_samples_per_second": 1.216,
1760
- "train_steps_per_second": 0.304
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1761
  }
1762
  ],
1763
- "max_steps": 7000,
1764
  "num_train_epochs": 9223372036854775807,
1765
- "total_flos": 2.857285499092992e+19,
1766
  "trial_name": null,
1767
  "trial_params": null
1768
  }
 
1
  {
2
+ "best_metric": 14.119648426424725,
3
+ "best_model_checkpoint": "./checkpoint-8000",
4
+ "epoch": 2.176,
5
+ "global_step": 8000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.0,
12
+ "learning_rate": 2.76e-07,
13
+ "loss": 1.4248,
14
  "step": 25
15
  },
16
  {
17
  "epoch": 0.01,
18
+ "learning_rate": 5.64e-07,
19
+ "loss": 1.3686,
20
  "step": 50
21
  },
22
  {
23
  "epoch": 0.01,
24
+ "learning_rate": 8.64e-07,
25
+ "loss": 1.1529,
26
  "step": 75
27
  },
28
  {
29
  "epoch": 0.01,
30
+ "learning_rate": 1.164e-06,
31
+ "loss": 0.8748,
32
  "step": 100
33
  },
34
  {
35
  "epoch": 0.02,
36
+ "learning_rate": 1.464e-06,
37
+ "loss": 0.7645,
38
  "step": 125
39
  },
40
  {
41
  "epoch": 0.02,
42
+ "learning_rate": 1.764e-06,
43
+ "loss": 0.71,
44
  "step": 150
45
  },
46
  {
47
+ "epoch": 0.02,
48
+ "learning_rate": 2.064e-06,
49
+ "loss": 0.7174,
50
  "step": 175
51
  },
52
  {
53
  "epoch": 0.03,
54
+ "learning_rate": 2.364e-06,
55
+ "loss": 0.6369,
56
  "step": 200
57
  },
58
  {
59
  "epoch": 0.03,
60
+ "learning_rate": 2.6640000000000002e-06,
61
+ "loss": 0.5867,
62
  "step": 225
63
  },
64
  {
65
+ "epoch": 0.03,
66
+ "learning_rate": 2.964e-06,
67
+ "loss": 0.6348,
68
  "step": 250
69
  },
70
  {
71
+ "epoch": 0.03,
72
+ "learning_rate": 3.2640000000000004e-06,
73
+ "loss": 0.6222,
74
  "step": 275
75
  },
76
  {
77
  "epoch": 0.04,
78
+ "learning_rate": 3.564e-06,
79
+ "loss": 0.5447,
80
  "step": 300
81
  },
82
  {
83
+ "epoch": 0.04,
84
+ "learning_rate": 3.864000000000001e-06,
85
+ "loss": 0.5093,
86
  "step": 325
87
  },
88
  {
89
+ "epoch": 0.04,
90
+ "learning_rate": 4.1639999999999994e-06,
91
+ "loss": 0.5134,
92
  "step": 350
93
  },
94
  {
95
  "epoch": 0.05,
96
+ "learning_rate": 4.464e-06,
97
+ "loss": 0.4913,
98
  "step": 375
99
  },
100
  {
101
+ "epoch": 0.05,
102
+ "learning_rate": 4.7640000000000005e-06,
103
+ "loss": 0.5007,
104
  "step": 400
105
  },
106
  {
107
+ "epoch": 0.05,
108
+ "learning_rate": 5.064e-06,
109
+ "loss": 0.4233,
110
  "step": 425
111
  },
112
  {
113
  "epoch": 0.06,
114
+ "learning_rate": 5.364000000000001e-06,
115
+ "loss": 0.3944,
116
  "step": 450
117
  },
118
  {
119
+ "epoch": 0.06,
120
+ "learning_rate": 5.6639999999999995e-06,
121
+ "loss": 0.4615,
122
  "step": 475
123
  },
124
  {
125
+ "epoch": 0.06,
126
+ "learning_rate": 5.964e-06,
127
+ "loss": 0.443,
128
+ "step": 500
129
+ },
130
+ {
131
+ "epoch": 0.06,
132
+ "eval_loss": 0.5036891102790833,
133
+ "eval_runtime": 1812.9009,
134
+ "eval_samples_per_second": 3.636,
135
+ "eval_steps_per_second": 0.455,
136
+ "eval_wer": 37.42962452914254,
137
  "step": 500
138
  },
139
  {
140
  "epoch": 0.07,
141
+ "learning_rate": 5.9824e-06,
142
+ "loss": 0.458,
143
  "step": 525
144
  },
145
  {
146
+ "epoch": 0.07,
147
+ "learning_rate": 5.962400000000001e-06,
148
+ "loss": 0.4264,
149
  "step": 550
150
  },
151
  {
152
+ "epoch": 0.07,
153
+ "learning_rate": 5.9424e-06,
154
+ "loss": 0.4405,
155
  "step": 575
156
  },
157
  {
158
+ "epoch": 0.07,
159
+ "learning_rate": 5.9224e-06,
160
+ "loss": 0.4451,
161
  "step": 600
162
  },
163
  {
164
+ "epoch": 0.08,
165
+ "learning_rate": 5.9024000000000004e-06,
166
+ "loss": 0.416,
167
  "step": 625
168
  },
169
  {
170
+ "epoch": 0.08,
171
+ "learning_rate": 5.882400000000001e-06,
172
+ "loss": 0.4193,
173
  "step": 650
174
  },
175
  {
176
+ "epoch": 0.08,
177
+ "learning_rate": 5.8624e-06,
178
+ "loss": 0.4029,
179
  "step": 675
180
  },
181
  {
182
+ "epoch": 0.09,
183
+ "learning_rate": 5.8424e-06,
184
+ "loss": 0.4028,
185
  "step": 700
186
  },
187
  {
188
+ "epoch": 0.09,
189
+ "learning_rate": 5.8224e-06,
190
+ "loss": 0.3613,
191
  "step": 725
192
  },
193
  {
194
+ "epoch": 0.09,
195
+ "learning_rate": 5.8024e-06,
196
+ "loss": 0.3995,
197
  "step": 750
198
  },
199
  {
200
+ "epoch": 0.1,
201
+ "learning_rate": 5.7824e-06,
202
+ "loss": 0.3865,
203
  "step": 775
204
  },
205
  {
206
+ "epoch": 0.1,
207
+ "learning_rate": 5.7624e-06,
208
+ "loss": 0.3534,
209
  "step": 800
210
  },
211
  {
212
+ "epoch": 0.1,
213
+ "learning_rate": 5.7424e-06,
214
+ "loss": 0.3741,
215
  "step": 825
216
  },
217
  {
218
+ "epoch": 0.11,
219
+ "learning_rate": 5.7224000000000005e-06,
220
+ "loss": 0.4243,
221
  "step": 850
222
  },
223
  {
224
+ "epoch": 0.11,
225
+ "learning_rate": 5.702400000000001e-06,
226
+ "loss": 0.387,
227
  "step": 875
228
  },
229
  {
230
+ "epoch": 0.11,
231
+ "learning_rate": 5.6824e-06,
232
+ "loss": 0.3726,
233
  "step": 900
234
  },
235
  {
236
+ "epoch": 0.12,
237
+ "learning_rate": 5.6624e-06,
238
+ "loss": 0.4141,
239
  "step": 925
240
  },
241
  {
242
+ "epoch": 0.12,
243
+ "learning_rate": 5.6424e-06,
244
+ "loss": 0.3553,
245
  "step": 950
246
  },
247
  {
248
+ "epoch": 0.12,
249
+ "learning_rate": 5.622400000000001e-06,
250
+ "loss": 0.4188,
251
  "step": 975
252
  },
253
  {
254
+ "epoch": 0.12,
255
+ "learning_rate": 5.6024e-06,
256
+ "loss": 0.4196,
257
  "step": 1000
258
  },
259
  {
260
+ "epoch": 0.12,
261
+ "eval_loss": 0.40096473693847656,
262
+ "eval_runtime": 1827.7488,
263
+ "eval_samples_per_second": 3.606,
264
+ "eval_steps_per_second": 0.451,
265
+ "eval_wer": 28.913686257037547,
266
  "step": 1000
267
  },
268
  {
269
+ "epoch": 0.13,
270
+ "learning_rate": 5.5824e-06,
271
+ "loss": 0.4134,
272
  "step": 1025
273
  },
274
  {
275
+ "epoch": 0.13,
276
+ "learning_rate": 5.5624e-06,
277
+ "loss": 0.3777,
278
  "step": 1050
279
  },
280
  {
281
+ "epoch": 0.13,
282
+ "learning_rate": 5.5424e-06,
283
+ "loss": 0.3565,
284
  "step": 1075
285
  },
286
  {
287
+ "epoch": 0.14,
288
+ "learning_rate": 5.5224e-06,
289
+ "loss": 0.3878,
290
  "step": 1100
291
  },
292
  {
293
+ "epoch": 0.14,
294
+ "learning_rate": 5.5024e-06,
295
+ "loss": 0.3691,
296
  "step": 1125
297
  },
298
  {
299
+ "epoch": 0.14,
300
+ "learning_rate": 5.4824e-06,
301
+ "loss": 0.3647,
302
  "step": 1150
303
  },
304
  {
305
+ "epoch": 0.15,
306
+ "learning_rate": 5.4624e-06,
307
+ "loss": 0.3352,
308
  "step": 1175
309
  },
310
  {
311
+ "epoch": 0.15,
312
+ "learning_rate": 5.442400000000001e-06,
313
+ "loss": 0.3047,
314
  "step": 1200
315
  },
316
  {
317
+ "epoch": 0.15,
318
+ "learning_rate": 5.422400000000001e-06,
319
+ "loss": 0.2753,
320
  "step": 1225
321
  },
322
  {
323
+ "epoch": 0.16,
324
+ "learning_rate": 5.4024e-06,
325
+ "loss": 0.3258,
326
  "step": 1250
327
  },
328
  {
329
+ "epoch": 0.16,
330
+ "learning_rate": 5.3824e-06,
331
+ "loss": 0.3405,
332
  "step": 1275
333
  },
334
  {
335
+ "epoch": 0.16,
336
+ "learning_rate": 5.3624000000000005e-06,
337
+ "loss": 0.3316,
338
  "step": 1300
339
  },
340
  {
341
+ "epoch": 0.17,
342
+ "learning_rate": 5.3424e-06,
343
+ "loss": 0.3187,
344
  "step": 1325
345
  },
346
  {
347
+ "epoch": 0.17,
348
+ "learning_rate": 5.3224e-06,
349
+ "loss": 0.2521,
350
  "step": 1350
351
  },
352
  {
353
+ "epoch": 0.17,
354
+ "learning_rate": 5.3024e-06,
355
+ "loss": 0.3445,
356
  "step": 1375
357
  },
358
  {
359
+ "epoch": 0.17,
360
+ "learning_rate": 5.2824e-06,
361
+ "loss": 0.2715,
362
  "step": 1400
363
  },
364
  {
365
+ "epoch": 0.18,
366
+ "learning_rate": 5.2624e-06,
367
+ "loss": 0.2951,
368
  "step": 1425
369
  },
370
  {
371
+ "epoch": 0.18,
372
+ "learning_rate": 5.2424e-06,
373
+ "loss": 0.3858,
374
  "step": 1450
375
  },
376
  {
377
+ "epoch": 0.18,
378
+ "learning_rate": 5.2224e-06,
379
+ "loss": 0.3253,
380
  "step": 1475
381
  },
382
  {
383
+ "epoch": 0.19,
384
+ "learning_rate": 5.2024e-06,
385
+ "loss": 0.2823,
386
  "step": 1500
387
  },
388
  {
389
+ "epoch": 0.19,
390
+ "eval_loss": 0.3452778458595276,
391
+ "eval_runtime": 1811.7894,
392
+ "eval_samples_per_second": 3.638,
393
+ "eval_steps_per_second": 0.455,
394
+ "eval_wer": 24.685082425371625,
395
+ "step": 1500
396
+ },
397
+ {
398
+ "epoch": 0.19,
399
+ "learning_rate": 5.1824000000000006e-06,
400
+ "loss": 0.2764,
401
  "step": 1525
402
  },
403
  {
404
+ "epoch": 0.19,
405
+ "learning_rate": 5.162400000000001e-06,
406
+ "loss": 0.2853,
407
  "step": 1550
408
  },
409
  {
410
+ "epoch": 0.2,
411
+ "learning_rate": 5.1424e-06,
412
+ "loss": 0.353,
413
  "step": 1575
414
  },
415
  {
416
+ "epoch": 0.2,
417
+ "learning_rate": 5.1224e-06,
418
+ "loss": 0.3318,
419
  "step": 1600
420
  },
421
  {
422
+ "epoch": 0.2,
423
+ "learning_rate": 5.1024000000000005e-06,
424
+ "loss": 0.3729,
425
  "step": 1625
426
  },
427
  {
428
+ "epoch": 0.21,
429
+ "learning_rate": 5.0824e-06,
430
+ "loss": 0.3314,
431
  "step": 1650
432
  },
433
  {
434
+ "epoch": 0.21,
435
+ "learning_rate": 5.0624e-06,
436
+ "loss": 0.3193,
437
  "step": 1675
438
  },
439
  {
440
+ "epoch": 0.21,
441
+ "learning_rate": 5.0424e-06,
442
+ "loss": 0.2881,
443
  "step": 1700
444
  },
445
  {
446
+ "epoch": 0.22,
447
+ "learning_rate": 5.0223999999999996e-06,
448
+ "loss": 0.2961,
449
  "step": 1725
450
  },
451
  {
452
+ "epoch": 0.22,
453
+ "learning_rate": 5.0024e-06,
454
+ "loss": 0.3286,
455
  "step": 1750
456
  },
457
  {
458
+ "epoch": 0.22,
459
+ "learning_rate": 4.9824e-06,
460
+ "loss": 0.3192,
461
  "step": 1775
462
  },
463
  {
464
+ "epoch": 0.23,
465
+ "learning_rate": 4.9624e-06,
466
+ "loss": 0.3183,
467
  "step": 1800
468
  },
469
  {
470
+ "epoch": 0.23,
471
+ "learning_rate": 4.9424e-06,
472
+ "loss": 0.2706,
473
  "step": 1825
474
  },
475
  {
476
+ "epoch": 0.23,
477
+ "learning_rate": 4.9224000000000005e-06,
478
+ "loss": 0.3048,
479
  "step": 1850
480
  },
481
  {
482
+ "epoch": 0.23,
483
+ "learning_rate": 4.902400000000001e-06,
484
+ "loss": 0.2524,
485
  "step": 1875
486
  },
487
  {
488
+ "epoch": 0.24,
489
+ "learning_rate": 4.8824e-06,
490
+ "loss": 0.2882,
491
  "step": 1900
492
  },
493
  {
494
+ "epoch": 0.24,
495
+ "learning_rate": 4.8624e-06,
496
+ "loss": 0.2466,
497
  "step": 1925
498
  },
499
  {
500
+ "epoch": 0.24,
501
+ "learning_rate": 4.8424000000000004e-06,
502
+ "loss": 0.1995,
503
  "step": 1950
504
  },
505
  {
506
+ "epoch": 0.25,
507
+ "learning_rate": 4.8224e-06,
508
+ "loss": 0.2497,
509
  "step": 1975
510
  },
511
  {
512
+ "epoch": 0.25,
513
+ "learning_rate": 4.8024e-06,
514
+ "loss": 0.2551,
515
  "step": 2000
516
  },
517
  {
518
+ "epoch": 0.25,
519
+ "eval_loss": 0.3163716495037079,
520
+ "eval_runtime": 1819.6056,
521
+ "eval_samples_per_second": 3.622,
522
  "eval_steps_per_second": 0.453,
523
+ "eval_wer": 22.57888128316254,
524
  "step": 2000
525
  },
526
  {
527
+ "epoch": 0.25,
528
+ "learning_rate": 4.7824e-06,
529
+ "loss": 0.2133,
530
  "step": 2025
531
  },
532
  {
533
+ "epoch": 0.26,
534
+ "learning_rate": 4.7623999999999995e-06,
535
+ "loss": 0.2474,
536
  "step": 2050
537
  },
538
  {
539
+ "epoch": 0.26,
540
+ "learning_rate": 4.7424e-06,
541
+ "loss": 0.2302,
542
  "step": 2075
543
  },
544
  {
545
+ "epoch": 0.26,
546
+ "learning_rate": 4.7232e-06,
547
+ "loss": 0.3048,
548
  "step": 2100
549
  },
550
  {
551
+ "epoch": 0.27,
552
+ "learning_rate": 4.7032e-06,
553
+ "loss": 0.2593,
554
  "step": 2125
555
  },
556
  {
557
+ "epoch": 0.27,
558
+ "learning_rate": 4.6832e-06,
559
+ "loss": 0.2605,
560
  "step": 2150
561
  },
562
  {
563
+ "epoch": 0.27,
564
+ "learning_rate": 4.6632000000000005e-06,
565
+ "loss": 0.2847,
566
  "step": 2175
567
  },
568
  {
569
+ "epoch": 0.28,
570
+ "learning_rate": 4.643200000000001e-06,
571
+ "loss": 0.2519,
572
  "step": 2200
573
  },
574
  {
575
+ "epoch": 0.28,
576
+ "learning_rate": 4.6232e-06,
577
+ "loss": 0.2576,
578
  "step": 2225
579
  },
580
  {
581
+ "epoch": 0.28,
582
+ "learning_rate": 4.6032e-06,
583
+ "loss": 0.2261,
584
  "step": 2250
585
  },
586
  {
587
+ "epoch": 0.28,
588
+ "learning_rate": 4.5832000000000004e-06,
589
+ "loss": 0.2777,
590
  "step": 2275
591
  },
592
  {
593
+ "epoch": 0.29,
594
+ "learning_rate": 4.5632e-06,
595
+ "loss": 0.2586,
596
  "step": 2300
597
  },
598
  {
599
+ "epoch": 0.29,
600
+ "learning_rate": 4.5432e-06,
601
+ "loss": 0.2482,
602
  "step": 2325
603
  },
604
  {
605
+ "epoch": 0.29,
606
+ "learning_rate": 4.5232e-06,
607
+ "loss": 0.2068,
608
  "step": 2350
609
  },
610
  {
611
+ "epoch": 0.3,
612
+ "learning_rate": 4.5032e-06,
613
+ "loss": 0.2333,
614
  "step": 2375
615
  },
616
  {
617
+ "epoch": 0.3,
618
+ "learning_rate": 4.4832e-06,
619
+ "loss": 0.2451,
620
  "step": 2400
621
  },
622
  {
623
+ "epoch": 0.3,
624
+ "learning_rate": 4.4632e-06,
625
+ "loss": 0.2199,
626
  "step": 2425
627
  },
628
  {
629
+ "epoch": 0.31,
630
+ "learning_rate": 4.4432e-06,
631
+ "loss": 0.2519,
632
  "step": 2450
633
  },
634
  {
635
+ "epoch": 0.31,
636
+ "learning_rate": 4.4232e-06,
637
+ "loss": 0.2314,
638
  "step": 2475
639
  },
640
  {
641
+ "epoch": 0.31,
642
+ "learning_rate": 4.4032000000000005e-06,
643
+ "loss": 0.206,
644
  "step": 2500
645
  },
646
  {
647
+ "epoch": 0.31,
648
+ "eval_loss": 0.29017817974090576,
649
+ "eval_runtime": 1826.6674,
650
+ "eval_samples_per_second": 3.608,
651
+ "eval_steps_per_second": 0.451,
652
+ "eval_wer": 19.79221515654745,
653
+ "step": 2500
654
+ },
655
+ {
656
+ "epoch": 0.32,
657
+ "learning_rate": 4.383200000000001e-06,
658
+ "loss": 0.2336,
659
  "step": 2525
660
  },
661
  {
662
+ "epoch": 0.32,
663
+ "learning_rate": 4.3632e-06,
664
+ "loss": 0.255,
665
  "step": 2550
666
  },
667
  {
668
+ "epoch": 0.32,
669
+ "learning_rate": 4.3432e-06,
670
+ "loss": 0.2523,
671
  "step": 2575
672
  },
673
  {
674
+ "epoch": 0.33,
675
+ "learning_rate": 4.3232e-06,
676
+ "loss": 0.2916,
677
  "step": 2600
678
  },
679
  {
680
+ "epoch": 0.33,
681
+ "learning_rate": 4.3032e-06,
682
+ "loss": 0.1934,
683
  "step": 2625
684
  },
685
  {
686
+ "epoch": 0.33,
687
+ "learning_rate": 4.2832e-06,
688
+ "loss": 0.245,
689
  "step": 2650
690
  },
691
  {
692
+ "epoch": 0.33,
693
+ "learning_rate": 4.2632e-06,
694
+ "loss": 0.2059,
695
  "step": 2675
696
  },
697
  {
698
+ "epoch": 0.34,
699
+ "learning_rate": 4.2432e-06,
700
+ "loss": 0.2197,
701
  "step": 2700
702
  },
703
  {
704
+ "epoch": 0.34,
705
+ "learning_rate": 4.2232e-06,
706
+ "loss": 0.2108,
707
  "step": 2725
708
  },
709
  {
710
+ "epoch": 0.34,
711
+ "learning_rate": 4.2032e-06,
712
+ "loss": 0.2485,
713
  "step": 2750
714
  },
715
  {
716
+ "epoch": 0.35,
717
+ "learning_rate": 4.1832e-06,
718
+ "loss": 0.201,
719
  "step": 2775
720
  },
721
  {
722
+ "epoch": 0.35,
723
+ "learning_rate": 4.1632e-06,
724
+ "loss": 0.2343,
725
  "step": 2800
726
  },
727
  {
728
+ "epoch": 0.35,
729
+ "learning_rate": 4.1432e-06,
730
+ "loss": 0.2088,
731
  "step": 2825
732
  },
733
  {
734
+ "epoch": 0.36,
735
+ "learning_rate": 4.123200000000001e-06,
736
+ "loss": 0.2294,
737
  "step": 2850
738
  },
739
  {
740
+ "epoch": 0.36,
741
+ "learning_rate": 4.1032e-06,
742
+ "loss": 0.2226,
743
  "step": 2875
744
  },
745
  {
746
+ "epoch": 0.36,
747
+ "learning_rate": 4.0832e-06,
748
+ "loss": 0.1993,
749
  "step": 2900
750
  },
751
  {
752
+ "epoch": 0.37,
753
+ "learning_rate": 4.0632e-06,
754
+ "loss": 0.2006,
755
  "step": 2925
756
  },
757
  {
758
+ "epoch": 0.37,
759
+ "learning_rate": 4.0432e-06,
760
+ "loss": 0.2385,
761
  "step": 2950
762
  },
763
  {
764
+ "epoch": 0.37,
765
+ "learning_rate": 4.0232e-06,
766
+ "loss": 0.2225,
767
  "step": 2975
768
  },
769
  {
770
+ "epoch": 0.38,
771
+ "learning_rate": 4.0032e-06,
772
+ "loss": 0.2327,
773
  "step": 3000
774
  },
775
  {
776
+ "epoch": 0.38,
777
+ "eval_loss": 0.2706596553325653,
778
+ "eval_runtime": 1809.8842,
779
+ "eval_samples_per_second": 3.642,
780
+ "eval_steps_per_second": 0.455,
781
+ "eval_wer": 18.935558345822027,
782
  "step": 3000
783
  },
784
  {
785
+ "epoch": 0.38,
786
+ "learning_rate": 3.9832e-06,
787
+ "loss": 0.1988,
788
  "step": 3025
789
  },
790
  {
791
+ "epoch": 0.38,
792
+ "learning_rate": 3.9632e-06,
793
+ "loss": 0.2117,
794
  "step": 3050
795
  },
796
  {
797
+ "epoch": 0.38,
798
+ "learning_rate": 3.9432e-06,
799
+ "loss": 0.1887,
800
  "step": 3075
801
  },
802
  {
803
+ "epoch": 0.39,
804
+ "learning_rate": 3.9232e-06,
805
+ "loss": 0.1975,
806
  "step": 3100
807
  },
808
  {
809
+ "epoch": 0.39,
810
+ "learning_rate": 3.9032e-06,
811
+ "loss": 0.2138,
812
  "step": 3125
813
  },
814
  {
815
+ "epoch": 0.39,
816
+ "learning_rate": 3.8832e-06,
817
+ "loss": 0.2082,
818
  "step": 3150
819
  },
820
  {
821
+ "epoch": 0.4,
822
+ "learning_rate": 3.8632000000000006e-06,
823
+ "loss": 0.2365,
824
  "step": 3175
825
  },
826
  {
827
+ "epoch": 0.4,
828
+ "learning_rate": 3.8432e-06,
829
+ "loss": 0.1919,
830
  "step": 3200
831
  },
832
  {
833
+ "epoch": 0.4,
834
+ "learning_rate": 3.8232e-06,
835
+ "loss": 0.2334,
836
  "step": 3225
837
  },
838
  {
839
+ "epoch": 0.41,
840
+ "learning_rate": 3.8032000000000003e-06,
841
+ "loss": 0.2743,
842
  "step": 3250
843
  },
844
  {
845
+ "epoch": 0.41,
846
+ "learning_rate": 3.7831999999999996e-06,
847
+ "loss": 0.1992,
848
  "step": 3275
849
  },
850
  {
851
  "epoch": 1.0,
852
+ "learning_rate": 3.7632e-06,
853
+ "loss": 0.236,
854
  "step": 3300
855
  },
856
  {
857
  "epoch": 1.0,
858
+ "learning_rate": 3.7432e-06,
859
+ "loss": 0.2389,
860
  "step": 3325
861
  },
862
  {
863
  "epoch": 1.01,
864
+ "learning_rate": 3.7232e-06,
865
+ "loss": 0.2236,
866
  "step": 3350
867
  },
868
  {
869
  "epoch": 1.01,
870
+ "learning_rate": 3.7032e-06,
871
+ "loss": 0.2187,
872
  "step": 3375
873
  },
874
  {
875
  "epoch": 1.01,
876
+ "learning_rate": 3.6832e-06,
877
+ "loss": 0.2022,
878
  "step": 3400
879
  },
880
  {
881
  "epoch": 1.02,
882
+ "learning_rate": 3.6632000000000004e-06,
883
+ "loss": 0.1943,
884
  "step": 3425
885
  },
886
  {
887
  "epoch": 1.02,
888
+ "learning_rate": 3.6431999999999997e-06,
889
+ "loss": 0.1858,
890
  "step": 3450
891
  },
892
  {
893
+ "epoch": 1.02,
894
+ "learning_rate": 3.6232e-06,
895
+ "loss": 0.1762,
896
  "step": 3475
897
  },
898
  {
899
  "epoch": 1.03,
900
+ "learning_rate": 3.6032e-06,
901
+ "loss": 0.1416,
902
  "step": 3500
903
  },
904
  {
905
  "epoch": 1.03,
906
+ "eval_loss": 0.25662127137184143,
907
+ "eval_runtime": 1825.0473,
908
+ "eval_samples_per_second": 3.611,
909
+ "eval_steps_per_second": 0.451,
910
+ "eval_wer": 17.69208959455628,
911
+ "step": 3500
912
+ },
913
+ {
914
+ "epoch": 1.03,
915
+ "learning_rate": 3.5832e-06,
916
+ "loss": 0.1777,
917
  "step": 3525
918
  },
919
  {
920
+ "epoch": 1.03,
921
+ "learning_rate": 3.5632e-06,
922
+ "loss": 0.1934,
923
  "step": 3550
924
  },
925
  {
926
+ "epoch": 1.03,
927
+ "learning_rate": 3.5432000000000002e-06,
928
+ "loss": 0.1566,
929
  "step": 3575
930
  },
931
  {
932
  "epoch": 1.04,
933
+ "learning_rate": 3.5232000000000004e-06,
934
+ "loss": 0.1635,
935
  "step": 3600
936
  },
937
  {
938
+ "epoch": 1.04,
939
+ "learning_rate": 3.5031999999999998e-06,
940
+ "loss": 0.1687,
941
  "step": 3625
942
  },
943
  {
944
+ "epoch": 1.04,
945
+ "learning_rate": 3.4832e-06,
946
+ "loss": 0.1216,
947
  "step": 3650
948
  },
949
  {
950
  "epoch": 1.05,
951
+ "learning_rate": 3.4632000000000006e-06,
952
+ "loss": 0.1197,
953
  "step": 3675
954
  },
955
  {
956
+ "epoch": 1.05,
957
+ "learning_rate": 3.4432e-06,
958
+ "loss": 0.1202,
959
  "step": 3700
960
  },
961
  {
962
+ "epoch": 1.05,
963
+ "learning_rate": 3.4232e-06,
964
+ "loss": 0.1162,
965
  "step": 3725
966
  },
967
  {
968
  "epoch": 1.06,
969
+ "learning_rate": 3.4032000000000003e-06,
970
+ "loss": 0.1157,
971
  "step": 3750
972
  },
973
  {
974
+ "epoch": 1.06,
975
+ "learning_rate": 3.3831999999999996e-06,
976
+ "loss": 0.1333,
977
  "step": 3775
978
  },
979
  {
980
+ "epoch": 1.06,
981
+ "learning_rate": 3.3632000000000003e-06,
982
+ "loss": 0.099,
983
  "step": 3800
984
  },
985
  {
986
+ "epoch": 1.07,
987
+ "learning_rate": 3.3432000000000004e-06,
988
+ "loss": 0.1247,
989
  "step": 3825
990
  },
991
  {
992
+ "epoch": 1.07,
993
+ "learning_rate": 3.3232e-06,
994
+ "loss": 0.112,
995
  "step": 3850
996
  },
997
  {
998
+ "epoch": 1.07,
999
+ "learning_rate": 3.3032e-06,
1000
+ "loss": 0.1092,
1001
  "step": 3875
1002
  },
1003
  {
1004
+ "epoch": 1.08,
1005
+ "learning_rate": 3.2832e-06,
1006
+ "loss": 0.1422,
1007
  "step": 3900
1008
  },
1009
  {
1010
+ "epoch": 1.08,
1011
+ "learning_rate": 3.2632000000000004e-06,
1012
+ "loss": 0.1294,
1013
  "step": 3925
1014
  },
1015
  {
1016
+ "epoch": 1.08,
1017
+ "learning_rate": 3.2432e-06,
1018
+ "loss": 0.1185,
1019
  "step": 3950
1020
  },
1021
  {
1022
+ "epoch": 1.08,
1023
+ "learning_rate": 3.2232000000000003e-06,
1024
+ "loss": 0.1345,
1025
  "step": 3975
1026
  },
1027
  {
1028
+ "epoch": 1.09,
1029
+ "learning_rate": 3.2032000000000005e-06,
1030
+ "loss": 0.0998,
1031
  "step": 4000
1032
  },
1033
  {
1034
+ "epoch": 1.09,
1035
+ "eval_loss": 0.25507599115371704,
1036
+ "eval_runtime": 1832.0695,
1037
+ "eval_samples_per_second": 3.598,
1038
+ "eval_steps_per_second": 0.45,
1039
+ "eval_wer": 16.821256429989067,
1040
  "step": 4000
1041
  },
1042
  {
1043
+ "epoch": 1.09,
1044
+ "learning_rate": 3.1832e-06,
1045
+ "loss": 0.1089,
1046
  "step": 4025
1047
  },
1048
  {
1049
+ "epoch": 1.09,
1050
+ "learning_rate": 3.1632e-06,
1051
+ "loss": 0.1012,
1052
  "step": 4050
1053
  },
1054
  {
1055
+ "epoch": 1.1,
1056
+ "learning_rate": 3.1432000000000002e-06,
1057
+ "loss": 0.1139,
1058
  "step": 4075
1059
  },
1060
  {
1061
+ "epoch": 1.1,
1062
+ "learning_rate": 3.1232e-06,
1063
+ "loss": 0.1186,
1064
  "step": 4100
1065
  },
1066
  {
1067
+ "epoch": 1.1,
1068
+ "learning_rate": 3.104e-06,
1069
+ "loss": 0.1189,
1070
  "step": 4125
1071
  },
1072
  {
1073
+ "epoch": 1.11,
1074
+ "learning_rate": 3.084e-06,
1075
+ "loss": 0.0963,
1076
  "step": 4150
1077
  },
1078
  {
1079
+ "epoch": 1.11,
1080
+ "learning_rate": 3.0640000000000002e-06,
1081
+ "loss": 0.1048,
1082
  "step": 4175
1083
  },
1084
  {
1085
+ "epoch": 1.11,
1086
+ "learning_rate": 3.044e-06,
1087
+ "loss": 0.1172,
1088
  "step": 4200
1089
  },
1090
  {
1091
+ "epoch": 1.12,
1092
+ "learning_rate": 3.024e-06,
1093
+ "loss": 0.1075,
1094
  "step": 4225
1095
  },
1096
  {
1097
+ "epoch": 1.12,
1098
+ "learning_rate": 3.0040000000000004e-06,
1099
+ "loss": 0.1362,
1100
  "step": 4250
1101
  },
1102
  {
1103
+ "epoch": 1.12,
1104
+ "learning_rate": 2.984e-06,
1105
+ "loss": 0.1227,
1106
  "step": 4275
1107
  },
1108
  {
1109
+ "epoch": 1.13,
1110
+ "learning_rate": 2.964e-06,
1111
+ "loss": 0.1109,
1112
  "step": 4300
1113
  },
1114
  {
1115
+ "epoch": 1.13,
1116
+ "learning_rate": 2.944e-06,
1117
+ "loss": 0.1078,
1118
  "step": 4325
1119
  },
1120
  {
1121
+ "epoch": 1.13,
1122
+ "learning_rate": 2.9240000000000003e-06,
1123
+ "loss": 0.1169,
1124
  "step": 4350
1125
  },
1126
  {
1127
+ "epoch": 1.13,
1128
+ "learning_rate": 2.904e-06,
1129
+ "loss": 0.0874,
1130
  "step": 4375
1131
  },
1132
  {
1133
+ "epoch": 1.14,
1134
+ "learning_rate": 2.8840000000000003e-06,
1135
+ "loss": 0.0942,
1136
  "step": 4400
1137
  },
1138
  {
1139
+ "epoch": 1.14,
1140
+ "learning_rate": 2.864e-06,
1141
+ "loss": 0.0842,
1142
  "step": 4425
1143
  },
1144
  {
1145
+ "epoch": 1.14,
1146
+ "learning_rate": 2.844e-06,
1147
+ "loss": 0.0776,
1148
  "step": 4450
1149
  },
1150
  {
1151
+ "epoch": 1.15,
1152
+ "learning_rate": 2.824e-06,
1153
+ "loss": 0.0973,
1154
  "step": 4475
1155
  },
1156
  {
1157
+ "epoch": 1.15,
1158
+ "learning_rate": 2.804e-06,
1159
+ "loss": 0.095,
1160
  "step": 4500
1161
  },
1162
  {
1163
+ "epoch": 1.15,
1164
+ "eval_loss": 0.2510645389556885,
1165
+ "eval_runtime": 1808.3002,
1166
+ "eval_samples_per_second": 3.645,
1167
+ "eval_steps_per_second": 0.456,
1168
+ "eval_wer": 16.389890234517395,
1169
+ "step": 4500
1170
+ },
1171
+ {
1172
+ "epoch": 1.15,
1173
+ "learning_rate": 2.7840000000000004e-06,
1174
+ "loss": 0.0869,
1175
  "step": 4525
1176
  },
1177
  {
1178
+ "epoch": 1.16,
1179
+ "learning_rate": 2.764e-06,
1180
+ "loss": 0.0864,
1181
  "step": 4550
1182
  },
1183
  {
1184
+ "epoch": 1.16,
1185
+ "learning_rate": 2.744e-06,
1186
+ "loss": 0.1199,
1187
  "step": 4575
1188
  },
1189
  {
1190
+ "epoch": 1.16,
1191
+ "learning_rate": 2.724e-06,
1192
+ "loss": 0.0972,
1193
  "step": 4600
1194
  },
1195
  {
1196
+ "epoch": 1.17,
1197
+ "learning_rate": 2.704e-06,
1198
+ "loss": 0.0819,
1199
  "step": 4625
1200
  },
1201
  {
1202
+ "epoch": 1.17,
1203
+ "learning_rate": 2.684e-06,
1204
+ "loss": 0.0767,
1205
  "step": 4650
1206
  },
1207
  {
1208
+ "epoch": 1.17,
1209
+ "learning_rate": 2.6640000000000002e-06,
1210
+ "loss": 0.088,
1211
  "step": 4675
1212
  },
1213
  {
1214
+ "epoch": 1.18,
1215
+ "learning_rate": 2.644e-06,
1216
+ "loss": 0.1127,
1217
  "step": 4700
1218
  },
1219
  {
1220
+ "epoch": 1.18,
1221
+ "learning_rate": 2.624e-06,
1222
+ "loss": 0.0818,
1223
  "step": 4725
1224
  },
1225
  {
1226
+ "epoch": 1.18,
1227
+ "learning_rate": 2.604e-06,
1228
+ "loss": 0.086,
1229
  "step": 4750
1230
  },
1231
  {
1232
+ "epoch": 1.18,
1233
+ "learning_rate": 2.5839999999999997e-06,
1234
+ "loss": 0.095,
1235
  "step": 4775
1236
  },
1237
  {
1238
+ "epoch": 1.19,
1239
+ "learning_rate": 2.564e-06,
1240
+ "loss": 0.0833,
1241
  "step": 4800
1242
  },
1243
  {
1244
+ "epoch": 1.19,
1245
+ "learning_rate": 2.544e-06,
1246
+ "loss": 0.0818,
1247
  "step": 4825
1248
  },
1249
  {
1250
+ "epoch": 1.19,
1251
+ "learning_rate": 2.5240000000000003e-06,
1252
+ "loss": 0.0801,
1253
  "step": 4850
1254
  },
1255
  {
1256
+ "epoch": 1.2,
1257
+ "learning_rate": 2.504e-06,
1258
+ "loss": 0.0969,
1259
  "step": 4875
1260
  },
1261
  {
1262
+ "epoch": 1.2,
1263
+ "learning_rate": 2.484e-06,
1264
+ "loss": 0.0883,
1265
  "step": 4900
1266
  },
1267
  {
1268
+ "epoch": 1.2,
1269
+ "learning_rate": 2.464e-06,
1270
+ "loss": 0.0877,
1271
  "step": 4925
1272
  },
1273
  {
1274
+ "epoch": 1.21,
1275
+ "learning_rate": 2.444e-06,
1276
+ "loss": 0.0964,
1277
  "step": 4950
1278
  },
1279
  {
1280
+ "epoch": 1.21,
1281
+ "learning_rate": 2.4240000000000004e-06,
1282
+ "loss": 0.107,
1283
  "step": 4975
1284
  },
1285
  {
1286
+ "epoch": 1.21,
1287
+ "learning_rate": 2.404e-06,
1288
+ "loss": 0.0971,
1289
  "step": 5000
1290
  },
1291
  {
1292
+ "epoch": 1.21,
1293
+ "eval_loss": 0.2415408343076706,
1294
+ "eval_runtime": 1804.4355,
1295
+ "eval_samples_per_second": 3.653,
1296
+ "eval_steps_per_second": 0.457,
1297
+ "eval_wer": 15.539309004009883,
1298
  "step": 5000
1299
  },
1300
  {
1301
+ "epoch": 1.22,
1302
+ "learning_rate": 2.384e-06,
1303
+ "loss": 0.1077,
1304
  "step": 5025
1305
  },
1306
  {
1307
+ "epoch": 1.22,
1308
+ "learning_rate": 2.364e-06,
1309
+ "loss": 0.0712,
1310
  "step": 5050
1311
  },
1312
  {
1313
+ "epoch": 1.22,
1314
+ "learning_rate": 2.344e-06,
1315
+ "loss": 0.0749,
1316
  "step": 5075
1317
  },
1318
  {
1319
+ "epoch": 1.23,
1320
+ "learning_rate": 2.324e-06,
1321
+ "loss": 0.0851,
1322
  "step": 5100
1323
  },
1324
  {
1325
+ "epoch": 1.23,
1326
+ "learning_rate": 2.3040000000000003e-06,
1327
+ "loss": 0.0851,
1328
  "step": 5125
1329
  },
1330
  {
1331
+ "epoch": 1.23,
1332
+ "learning_rate": 2.284e-06,
1333
+ "loss": 0.0782,
1334
  "step": 5150
1335
  },
1336
  {
1337
+ "epoch": 1.23,
1338
+ "learning_rate": 2.2640000000000003e-06,
1339
+ "loss": 0.0795,
1340
  "step": 5175
1341
  },
1342
  {
1343
+ "epoch": 1.24,
1344
+ "learning_rate": 2.244e-06,
1345
+ "loss": 0.0611,
1346
  "step": 5200
1347
  },
1348
  {
1349
+ "epoch": 1.24,
1350
+ "learning_rate": 2.224e-06,
1351
+ "loss": 0.0752,
1352
  "step": 5225
1353
  },
1354
  {
1355
+ "epoch": 1.24,
1356
+ "learning_rate": 2.204e-06,
1357
+ "loss": 0.0787,
1358
  "step": 5250
1359
  },
1360
  {
1361
+ "epoch": 1.25,
1362
+ "learning_rate": 2.184e-06,
1363
+ "loss": 0.0717,
1364
  "step": 5275
1365
  },
1366
  {
1367
+ "epoch": 1.25,
1368
+ "learning_rate": 2.1640000000000004e-06,
1369
+ "loss": 0.0748,
1370
  "step": 5300
1371
  },
1372
  {
1373
+ "epoch": 1.25,
1374
+ "learning_rate": 2.144e-06,
1375
+ "loss": 0.0586,
1376
  "step": 5325
1377
  },
1378
  {
1379
+ "epoch": 1.26,
1380
+ "learning_rate": 2.124e-06,
1381
+ "loss": 0.0719,
1382
  "step": 5350
1383
  },
1384
  {
1385
+ "epoch": 1.26,
1386
+ "learning_rate": 2.104e-06,
1387
+ "loss": 0.0868,
1388
  "step": 5375
1389
  },
1390
  {
1391
+ "epoch": 1.26,
1392
+ "learning_rate": 2.084e-06,
1393
+ "loss": 0.0631,
1394
  "step": 5400
1395
  },
1396
  {
1397
+ "epoch": 1.27,
1398
+ "learning_rate": 2.064e-06,
1399
+ "loss": 0.0767,
1400
  "step": 5425
1401
  },
1402
  {
1403
+ "epoch": 1.27,
1404
+ "learning_rate": 2.0440000000000003e-06,
1405
+ "loss": 0.0834,
1406
  "step": 5450
1407
  },
1408
  {
1409
+ "epoch": 1.27,
1410
+ "learning_rate": 2.024e-06,
1411
+ "loss": 0.0767,
1412
  "step": 5475
1413
  },
1414
  {
1415
+ "epoch": 1.28,
1416
+ "learning_rate": 2.004e-06,
1417
+ "loss": 0.0964,
1418
  "step": 5500
1419
  },
1420
  {
1421
+ "epoch": 1.28,
1422
+ "eval_loss": 0.2336428314447403,
1423
+ "eval_runtime": 1826.0414,
1424
+ "eval_samples_per_second": 3.609,
1425
+ "eval_steps_per_second": 0.451,
1426
+ "eval_wer": 15.170723804123293,
1427
+ "step": 5500
1428
+ },
1429
+ {
1430
+ "epoch": 1.28,
1431
+ "learning_rate": 1.984e-06,
1432
+ "loss": 0.0813,
1433
  "step": 5525
1434
  },
1435
  {
1436
+ "epoch": 1.28,
1437
+ "learning_rate": 1.9639999999999997e-06,
1438
+ "loss": 0.0517,
1439
  "step": 5550
1440
  },
1441
  {
1442
+ "epoch": 1.28,
1443
+ "learning_rate": 1.944e-06,
1444
+ "loss": 0.079,
1445
  "step": 5575
1446
  },
1447
  {
1448
+ "epoch": 1.29,
1449
+ "learning_rate": 1.924e-06,
1450
+ "loss": 0.0787,
1451
  "step": 5600
1452
  },
1453
  {
1454
+ "epoch": 1.29,
1455
+ "learning_rate": 1.9040000000000001e-06,
1456
+ "loss": 0.0537,
1457
  "step": 5625
1458
  },
1459
  {
1460
+ "epoch": 1.29,
1461
+ "learning_rate": 1.884e-06,
1462
+ "loss": 0.0696,
1463
  "step": 5650
1464
  },
1465
  {
1466
+ "epoch": 1.3,
1467
+ "learning_rate": 1.8639999999999999e-06,
1468
+ "loss": 0.0737,
1469
  "step": 5675
1470
  },
1471
  {
1472
+ "epoch": 1.3,
1473
+ "learning_rate": 1.844e-06,
1474
+ "loss": 0.0698,
1475
  "step": 5700
1476
  },
1477
  {
1478
+ "epoch": 1.3,
1479
+ "learning_rate": 1.824e-06,
1480
+ "loss": 0.0831,
1481
  "step": 5725
1482
  },
1483
  {
1484
+ "epoch": 1.31,
1485
+ "learning_rate": 1.8040000000000002e-06,
1486
+ "loss": 0.0919,
1487
  "step": 5750
1488
  },
1489
  {
1490
+ "epoch": 1.31,
1491
+ "learning_rate": 1.784e-06,
1492
+ "loss": 0.0846,
1493
  "step": 5775
1494
  },
1495
  {
1496
+ "epoch": 1.31,
1497
+ "learning_rate": 1.764e-06,
1498
+ "loss": 0.0653,
1499
  "step": 5800
1500
  },
1501
  {
1502
+ "epoch": 1.32,
1503
+ "learning_rate": 1.7440000000000002e-06,
1504
+ "loss": 0.0676,
1505
  "step": 5825
1506
  },
1507
  {
1508
+ "epoch": 1.32,
1509
+ "learning_rate": 1.724e-06,
1510
+ "loss": 0.0559,
1511
  "step": 5850
1512
  },
1513
  {
1514
+ "epoch": 1.32,
1515
+ "learning_rate": 1.704e-06,
1516
+ "loss": 0.0659,
1517
  "step": 5875
1518
  },
1519
  {
1520
+ "epoch": 1.33,
1521
+ "learning_rate": 1.684e-06,
1522
+ "loss": 0.0693,
1523
  "step": 5900
1524
  },
1525
  {
1526
+ "epoch": 1.33,
1527
+ "learning_rate": 1.6639999999999999e-06,
1528
+ "loss": 0.0582,
1529
  "step": 5925
1530
  },
1531
  {
1532
+ "epoch": 1.33,
1533
+ "learning_rate": 1.6440000000000003e-06,
1534
+ "loss": 0.1016,
1535
  "step": 5950
1536
  },
1537
  {
1538
+ "epoch": 1.33,
1539
+ "learning_rate": 1.624e-06,
1540
+ "loss": 0.064,
1541
  "step": 5975
1542
  },
1543
  {
1544
+ "epoch": 1.34,
1545
+ "learning_rate": 1.604e-06,
1546
+ "loss": 0.072,
1547
  "step": 6000
1548
  },
1549
  {
1550
+ "epoch": 1.34,
1551
+ "eval_loss": 0.23533816635608673,
1552
+ "eval_runtime": 1816.0058,
1553
+ "eval_samples_per_second": 3.629,
1554
  "eval_steps_per_second": 0.454,
1555
+ "eval_wer": 14.75960954271133,
1556
  "step": 6000
1557
  },
1558
  {
1559
+ "epoch": 1.34,
1560
+ "learning_rate": 1.5840000000000002e-06,
1561
+ "loss": 0.0494,
1562
  "step": 6025
1563
  },
1564
  {
1565
+ "epoch": 1.34,
1566
+ "learning_rate": 1.564e-06,
1567
+ "loss": 0.0647,
1568
  "step": 6050
1569
  },
1570
  {
1571
+ "epoch": 1.35,
1572
+ "learning_rate": 1.5440000000000002e-06,
1573
+ "loss": 0.073,
1574
  "step": 6075
1575
  },
1576
  {
1577
+ "epoch": 1.35,
1578
+ "learning_rate": 1.5240000000000001e-06,
1579
+ "loss": 0.0758,
1580
  "step": 6100
1581
  },
1582
  {
1583
+ "epoch": 1.35,
1584
+ "learning_rate": 1.504e-06,
1585
+ "loss": 0.0473,
1586
  "step": 6125
1587
  },
1588
  {
1589
+ "epoch": 1.36,
1590
+ "learning_rate": 1.484e-06,
1591
+ "loss": 0.0645,
1592
  "step": 6150
1593
  },
1594
  {
1595
+ "epoch": 1.36,
1596
+ "learning_rate": 1.464e-06,
1597
+ "loss": 0.0544,
1598
  "step": 6175
1599
  },
1600
  {
1601
+ "epoch": 1.36,
1602
+ "learning_rate": 1.444e-06,
1603
+ "loss": 0.0674,
1604
  "step": 6200
1605
  },
1606
  {
1607
+ "epoch": 1.37,
1608
+ "learning_rate": 1.424e-06,
1609
+ "loss": 0.0721,
1610
  "step": 6225
1611
  },
1612
  {
1613
+ "epoch": 1.37,
1614
+ "learning_rate": 1.404e-06,
1615
+ "loss": 0.0668,
1616
  "step": 6250
1617
  },
1618
  {
1619
+ "epoch": 1.37,
1620
+ "learning_rate": 1.384e-06,
1621
+ "loss": 0.0587,
1622
  "step": 6275
1623
  },
1624
  {
1625
+ "epoch": 1.38,
1626
+ "learning_rate": 1.364e-06,
1627
+ "loss": 0.0494,
1628
  "step": 6300
1629
  },
1630
  {
1631
+ "epoch": 1.38,
1632
+ "learning_rate": 1.344e-06,
1633
+ "loss": 0.0802,
1634
  "step": 6325
1635
  },
1636
  {
1637
+ "epoch": 1.38,
1638
+ "learning_rate": 1.3240000000000002e-06,
1639
+ "loss": 0.0636,
1640
  "step": 6350
1641
  },
1642
  {
1643
+ "epoch": 1.38,
1644
+ "learning_rate": 1.304e-06,
1645
+ "loss": 0.0499,
1646
  "step": 6375
1647
  },
1648
  {
1649
+ "epoch": 1.39,
1650
+ "learning_rate": 1.284e-06,
1651
+ "loss": 0.0666,
1652
  "step": 6400
1653
  },
1654
  {
1655
+ "epoch": 1.39,
1656
+ "learning_rate": 1.264e-06,
1657
+ "loss": 0.0508,
1658
  "step": 6425
1659
  },
1660
  {
1661
+ "epoch": 1.39,
1662
+ "learning_rate": 1.244e-06,
1663
+ "loss": 0.0472,
1664
  "step": 6450
1665
  },
1666
  {
1667
+ "epoch": 1.4,
1668
+ "learning_rate": 1.224e-06,
1669
+ "loss": 0.0643,
1670
  "step": 6475
1671
  },
1672
  {
1673
+ "epoch": 1.4,
1674
+ "learning_rate": 1.204e-06,
1675
+ "loss": 0.0658,
1676
+ "step": 6500
1677
+ },
1678
+ {
1679
+ "epoch": 1.4,
1680
+ "eval_loss": 0.23401623964309692,
1681
+ "eval_runtime": 1798.9797,
1682
+ "eval_samples_per_second": 3.664,
1683
+ "eval_steps_per_second": 0.458,
1684
+ "eval_wer": 14.676576613066548,
1685
  "step": 6500
1686
  },
1687
  {
1688
+ "epoch": 1.4,
1689
+ "learning_rate": 1.1848e-06,
1690
+ "loss": 0.0671,
1691
  "step": 6525
1692
  },
1693
  {
1694
+ "epoch": 1.41,
1695
+ "learning_rate": 1.1648e-06,
1696
+ "loss": 0.0707,
1697
  "step": 6550
1698
  },
1699
  {
1700
+ "epoch": 1.41,
1701
+ "learning_rate": 1.1448e-06,
1702
+ "loss": 0.0723,
1703
  "step": 6575
1704
  },
1705
  {
1706
  "epoch": 2.0,
1707
+ "learning_rate": 1.1248e-06,
1708
+ "loss": 0.0625,
1709
  "step": 6600
1710
  },
1711
  {
1712
  "epoch": 2.0,
1713
+ "learning_rate": 1.1048e-06,
1714
+ "loss": 0.0843,
1715
  "step": 6625
1716
  },
1717
  {
1718
  "epoch": 2.01,
1719
+ "learning_rate": 1.0848e-06,
1720
+ "loss": 0.087,
1721
  "step": 6650
1722
  },
1723
  {
1724
  "epoch": 2.01,
1725
+ "learning_rate": 1.0648e-06,
1726
+ "loss": 0.0561,
1727
  "step": 6675
1728
  },
1729
  {
1730
+ "epoch": 2.01,
1731
+ "learning_rate": 1.0448e-06,
1732
+ "loss": 0.063,
1733
  "step": 6700
1734
  },
1735
  {
1736
  "epoch": 2.02,
1737
+ "learning_rate": 1.0248000000000001e-06,
1738
+ "loss": 0.0596,
1739
  "step": 6725
1740
  },
1741
  {
1742
  "epoch": 2.02,
1743
+ "learning_rate": 1.0048e-06,
1744
+ "loss": 0.0506,
1745
  "step": 6750
1746
  },
1747
  {
1748
+ "epoch": 2.02,
1749
+ "learning_rate": 9.848e-07,
1750
+ "loss": 0.0526,
1751
  "step": 6775
1752
  },
1753
  {
1754
  "epoch": 2.03,
1755
+ "learning_rate": 9.648e-07,
1756
+ "loss": 0.0479,
1757
  "step": 6800
1758
  },
1759
  {
1760
  "epoch": 2.03,
1761
+ "learning_rate": 9.448e-07,
1762
+ "loss": 0.0494,
1763
  "step": 6825
1764
  },
1765
  {
1766
+ "epoch": 2.03,
1767
+ "learning_rate": 9.248000000000001e-07,
1768
+ "loss": 0.0564,
1769
  "step": 6850
1770
  },
1771
  {
1772
  "epoch": 2.04,
1773
+ "learning_rate": 9.048e-07,
1774
+ "loss": 0.0499,
1775
  "step": 6875
1776
  },
1777
  {
1778
  "epoch": 2.04,
1779
+ "learning_rate": 8.848e-07,
1780
+ "loss": 0.0593,
1781
  "step": 6900
1782
  },
1783
  {
1784
+ "epoch": 2.04,
1785
+ "learning_rate": 8.648000000000001e-07,
1786
+ "loss": 0.049,
1787
  "step": 6925
1788
  },
1789
  {
1790
+ "epoch": 2.04,
1791
+ "learning_rate": 8.448e-07,
1792
+ "loss": 0.0527,
1793
  "step": 6950
1794
  },
1795
  {
1796
  "epoch": 2.05,
1797
+ "learning_rate": 8.247999999999999e-07,
1798
+ "loss": 0.0283,
1799
  "step": 6975
1800
  },
1801
  {
1802
+ "epoch": 2.05,
1803
+ "learning_rate": 8.048e-07,
1804
+ "loss": 0.033,
1805
  "step": 7000
1806
  },
1807
  {
1808
+ "epoch": 2.05,
1809
+ "eval_loss": 0.2349175214767456,
1810
+ "eval_runtime": 1811.228,
1811
+ "eval_samples_per_second": 3.639,
1812
+ "eval_steps_per_second": 0.455,
1813
+ "eval_wer": 14.376847988982949,
1814
  "step": 7000
1815
  },
1816
+ {
1817
+ "epoch": 2.05,
1818
+ "learning_rate": 7.848e-07,
1819
+ "loss": 0.033,
1820
+ "step": 7025
1821
+ },
1822
  {
1823
  "epoch": 2.06,
1824
+ "learning_rate": 7.648000000000001e-07,
1825
+ "loss": 0.0397,
1826
+ "step": 7050
1827
+ },
1828
+ {
1829
+ "epoch": 2.06,
1830
+ "learning_rate": 7.448e-07,
1831
+ "loss": 0.0271,
1832
+ "step": 7075
1833
+ },
1834
+ {
1835
+ "epoch": 2.06,
1836
+ "learning_rate": 7.248e-07,
1837
+ "loss": 0.0404,
1838
+ "step": 7100
1839
+ },
1840
+ {
1841
+ "epoch": 2.07,
1842
+ "learning_rate": 7.048e-07,
1843
+ "loss": 0.0348,
1844
+ "step": 7125
1845
+ },
1846
+ {
1847
+ "epoch": 2.07,
1848
+ "learning_rate": 6.848e-07,
1849
+ "loss": 0.0279,
1850
+ "step": 7150
1851
+ },
1852
+ {
1853
+ "epoch": 2.07,
1854
+ "learning_rate": 6.648e-07,
1855
+ "loss": 0.0296,
1856
+ "step": 7175
1857
+ },
1858
+ {
1859
+ "epoch": 2.08,
1860
+ "learning_rate": 6.448000000000001e-07,
1861
+ "loss": 0.0473,
1862
+ "step": 7200
1863
+ },
1864
+ {
1865
+ "epoch": 2.08,
1866
+ "learning_rate": 6.247999999999999e-07,
1867
+ "loss": 0.0287,
1868
+ "step": 7225
1869
+ },
1870
+ {
1871
+ "epoch": 2.08,
1872
+ "learning_rate": 6.048e-07,
1873
+ "loss": 0.0377,
1874
+ "step": 7250
1875
+ },
1876
+ {
1877
+ "epoch": 2.09,
1878
+ "learning_rate": 5.848e-07,
1879
+ "loss": 0.042,
1880
+ "step": 7275
1881
+ },
1882
+ {
1883
+ "epoch": 2.09,
1884
+ "learning_rate": 5.648e-07,
1885
+ "loss": 0.0375,
1886
+ "step": 7300
1887
+ },
1888
+ {
1889
+ "epoch": 2.09,
1890
+ "learning_rate": 5.448000000000001e-07,
1891
+ "loss": 0.0367,
1892
+ "step": 7325
1893
+ },
1894
+ {
1895
+ "epoch": 2.09,
1896
+ "learning_rate": 5.248e-07,
1897
+ "loss": 0.0398,
1898
+ "step": 7350
1899
+ },
1900
+ {
1901
+ "epoch": 2.1,
1902
+ "learning_rate": 5.048e-07,
1903
+ "loss": 0.0381,
1904
+ "step": 7375
1905
+ },
1906
+ {
1907
+ "epoch": 2.1,
1908
+ "learning_rate": 4.848e-07,
1909
+ "loss": 0.0266,
1910
+ "step": 7400
1911
+ },
1912
+ {
1913
+ "epoch": 2.1,
1914
+ "learning_rate": 4.6480000000000003e-07,
1915
+ "loss": 0.0321,
1916
+ "step": 7425
1917
+ },
1918
+ {
1919
+ "epoch": 2.11,
1920
+ "learning_rate": 4.4479999999999996e-07,
1921
+ "loss": 0.0351,
1922
+ "step": 7450
1923
+ },
1924
+ {
1925
+ "epoch": 2.11,
1926
+ "learning_rate": 4.2480000000000005e-07,
1927
+ "loss": 0.0359,
1928
+ "step": 7475
1929
+ },
1930
+ {
1931
+ "epoch": 2.11,
1932
+ "learning_rate": 4.0479999999999997e-07,
1933
+ "loss": 0.0288,
1934
+ "step": 7500
1935
+ },
1936
+ {
1937
+ "epoch": 2.11,
1938
+ "eval_loss": 0.23708966374397278,
1939
+ "eval_runtime": 1812.1644,
1940
+ "eval_samples_per_second": 3.637,
1941
+ "eval_steps_per_second": 0.455,
1942
+ "eval_wer": 14.186479808821742,
1943
+ "step": 7500
1944
+ },
1945
+ {
1946
+ "epoch": 2.12,
1947
+ "learning_rate": 3.848e-07,
1948
+ "loss": 0.0363,
1949
+ "step": 7525
1950
+ },
1951
+ {
1952
+ "epoch": 2.12,
1953
+ "learning_rate": 3.648e-07,
1954
+ "loss": 0.0351,
1955
+ "step": 7550
1956
+ },
1957
+ {
1958
+ "epoch": 2.12,
1959
+ "learning_rate": 3.448e-07,
1960
+ "loss": 0.029,
1961
+ "step": 7575
1962
+ },
1963
+ {
1964
+ "epoch": 2.13,
1965
+ "learning_rate": 3.248e-07,
1966
+ "loss": 0.0381,
1967
+ "step": 7600
1968
+ },
1969
+ {
1970
+ "epoch": 2.13,
1971
+ "learning_rate": 3.048e-07,
1972
+ "loss": 0.0299,
1973
+ "step": 7625
1974
+ },
1975
+ {
1976
+ "epoch": 2.13,
1977
+ "learning_rate": 2.848e-07,
1978
+ "loss": 0.0334,
1979
+ "step": 7650
1980
+ },
1981
+ {
1982
+ "epoch": 2.14,
1983
+ "learning_rate": 2.648e-07,
1984
+ "loss": 0.0273,
1985
+ "step": 7675
1986
+ },
1987
+ {
1988
+ "epoch": 2.14,
1989
+ "learning_rate": 2.448e-07,
1990
+ "loss": 0.0357,
1991
+ "step": 7700
1992
+ },
1993
+ {
1994
+ "epoch": 2.14,
1995
+ "learning_rate": 2.2480000000000003e-07,
1996
+ "loss": 0.0386,
1997
+ "step": 7725
1998
+ },
1999
+ {
2000
+ "epoch": 2.14,
2001
+ "learning_rate": 2.048e-07,
2002
+ "loss": 0.0307,
2003
+ "step": 7750
2004
+ },
2005
+ {
2006
+ "epoch": 2.15,
2007
+ "learning_rate": 1.8480000000000001e-07,
2008
+ "loss": 0.0319,
2009
+ "step": 7775
2010
+ },
2011
+ {
2012
+ "epoch": 2.15,
2013
+ "learning_rate": 1.648e-07,
2014
+ "loss": 0.0197,
2015
+ "step": 7800
2016
+ },
2017
+ {
2018
+ "epoch": 2.15,
2019
+ "learning_rate": 1.448e-07,
2020
+ "loss": 0.0328,
2021
+ "step": 7825
2022
+ },
2023
+ {
2024
+ "epoch": 2.16,
2025
+ "learning_rate": 1.248e-07,
2026
+ "loss": 0.0317,
2027
+ "step": 7850
2028
+ },
2029
+ {
2030
+ "epoch": 2.16,
2031
+ "learning_rate": 1.048e-07,
2032
+ "loss": 0.0264,
2033
+ "step": 7875
2034
+ },
2035
+ {
2036
+ "epoch": 2.16,
2037
+ "learning_rate": 8.48e-08,
2038
+ "loss": 0.029,
2039
+ "step": 7900
2040
+ },
2041
+ {
2042
+ "epoch": 2.17,
2043
+ "learning_rate": 6.480000000000001e-08,
2044
+ "loss": 0.0224,
2045
+ "step": 7925
2046
+ },
2047
+ {
2048
+ "epoch": 2.17,
2049
+ "learning_rate": 4.48e-08,
2050
+ "loss": 0.0297,
2051
+ "step": 7950
2052
+ },
2053
+ {
2054
+ "epoch": 2.17,
2055
+ "learning_rate": 2.48e-08,
2056
+ "loss": 0.0337,
2057
+ "step": 7975
2058
+ },
2059
+ {
2060
+ "epoch": 2.18,
2061
+ "learning_rate": 4.8e-09,
2062
+ "loss": 0.0352,
2063
+ "step": 8000
2064
+ },
2065
+ {
2066
+ "epoch": 2.18,
2067
+ "eval_loss": 0.23755376040935516,
2068
+ "eval_runtime": 1811.3928,
2069
+ "eval_samples_per_second": 3.639,
2070
+ "eval_steps_per_second": 0.455,
2071
+ "eval_wer": 14.119648426424725,
2072
+ "step": 8000
2073
+ },
2074
+ {
2075
+ "epoch": 2.18,
2076
+ "step": 8000,
2077
+ "total_flos": 3.265527462100992e+19,
2078
+ "train_loss": 0.191678307980299,
2079
+ "train_runtime": 39548.417,
2080
+ "train_samples_per_second": 0.809,
2081
+ "train_steps_per_second": 0.202
2082
  }
2083
  ],
2084
+ "max_steps": 8000,
2085
  "num_train_epochs": 9223372036854775807,
2086
+ "total_flos": 3.265527462100992e+19,
2087
  "trial_name": null,
2088
  "trial_params": null
2089
  }