steja commited on
Commit
3b643b7
1 Parent(s): 4c17a7b

End of training

Browse files
Files changed (5) hide show
  1. README.md +10 -9
  2. all_results.json +10 -10
  3. eval_results.json +6 -6
  4. train_results.json +5 -5
  5. trainer_state.json +279 -279
README.md CHANGED
@@ -2,38 +2,39 @@
2
  license: apache-2.0
3
  base_model: openai/whisper-small
4
  tags:
 
5
  - generated_from_trainer
6
  datasets:
7
- - fleurs
8
  metrics:
9
  - wer
10
  model-index:
11
- - name: whisper-small-shona
12
  results:
13
  - task:
14
  name: Automatic Speech Recognition
15
  type: automatic-speech-recognition
16
  dataset:
17
- name: fleurs
18
- type: fleurs
19
  config: sn_zw
20
  split: test
21
  args: sn_zw
22
  metrics:
23
  - name: Wer
24
  type: wer
25
- value: 50.04219409282701
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
29
  should probably proofread and complete it, then remove this comment. -->
30
 
31
- # whisper-small-shona
32
 
33
- This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the fleurs dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 1.1298
36
- - Wer: 50.0422
37
 
38
  ## Model description
39
 
 
2
  license: apache-2.0
3
  base_model: openai/whisper-small
4
  tags:
5
+ - whisper-event
6
  - generated_from_trainer
7
  datasets:
8
+ - google/fleurs
9
  metrics:
10
  - wer
11
  model-index:
12
+ - name: Whisper small shona
13
  results:
14
  - task:
15
  name: Automatic Speech Recognition
16
  type: automatic-speech-recognition
17
  dataset:
18
+ name: google/fleurs sn_zw
19
+ type: google/fleurs
20
  config: sn_zw
21
  split: test
22
  args: sn_zw
23
  metrics:
24
  - name: Wer
25
  type: wer
26
+ value: 49.90958408679928
27
  ---
28
 
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
30
  should probably proofread and complete it, then remove this comment. -->
31
 
32
+ # Whisper small shona
33
 
34
+ This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the google/fleurs sn_zw dataset.
35
  It achieves the following results on the evaluation set:
36
+ - Loss: 1.1220
37
+ - Wer: 49.9096
38
 
39
  ## Model description
40
 
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 40.82,
3
- "eval_loss": 0.965289831161499,
4
- "eval_runtime": 148.1672,
5
- "eval_samples_per_second": 6.243,
6
- "eval_steps_per_second": 0.391,
7
- "eval_wer": 45.9,
8
- "train_loss": 0.18361923832818866,
9
- "train_runtime": 4653.6924,
10
- "train_samples_per_second": 6.876,
11
- "train_steps_per_second": 0.43
12
  }
 
1
  {
2
+ "epoch": 121.21,
3
+ "eval_loss": 1.1220260858535767,
4
+ "eval_runtime": 56.2101,
5
+ "eval_samples_per_second": 16.456,
6
+ "eval_steps_per_second": 0.356,
7
+ "eval_wer": 49.90958408679928,
8
+ "train_loss": 0.143078886593692,
9
+ "train_runtime": 4289.2081,
10
+ "train_samples_per_second": 22.382,
11
+ "train_steps_per_second": 0.466
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 40.82,
3
- "eval_loss": 0.965289831161499,
4
- "eval_runtime": 148.1672,
5
- "eval_samples_per_second": 6.243,
6
- "eval_steps_per_second": 0.391,
7
- "eval_wer": 45.9
8
  }
 
1
  {
2
+ "epoch": 121.21,
3
+ "eval_loss": 1.1220260858535767,
4
+ "eval_runtime": 56.2101,
5
+ "eval_samples_per_second": 16.456,
6
+ "eval_steps_per_second": 0.356,
7
+ "eval_wer": 49.90958408679928
8
  }
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 40.82,
3
- "train_loss": 0.18361923832818866,
4
- "train_runtime": 4653.6924,
5
- "train_samples_per_second": 6.876,
6
- "train_steps_per_second": 0.43
7
  }
 
1
  {
2
+ "epoch": 121.21,
3
+ "train_loss": 0.143078886593692,
4
+ "train_runtime": 4289.2081,
5
+ "train_samples_per_second": 22.382,
6
+ "train_steps_per_second": 0.466
7
  }
trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 45.9,
3
  "best_model_checkpoint": "./whisper-small-shona/checkpoint-1600",
4
- "epoch": 40.816326530612244,
5
  "eval_steps": 400,
6
  "global_step": 2000,
7
  "is_hyper_param_search": false,
@@ -9,546 +9,546 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.51,
13
  "learning_rate": 4.800000000000001e-07,
14
- "loss": 2.8886,
15
  "step": 25
16
  },
17
  {
18
- "epoch": 1.02,
19
- "learning_rate": 9.800000000000001e-07,
20
- "loss": 2.4034,
21
  "step": 50
22
  },
23
  {
24
- "epoch": 1.53,
25
- "learning_rate": 1.48e-06,
26
- "loss": 1.8889,
27
  "step": 75
28
  },
29
  {
30
- "epoch": 2.04,
31
- "learning_rate": 1.98e-06,
32
- "loss": 1.4574,
33
  "step": 100
34
  },
35
  {
36
- "epoch": 2.55,
37
- "learning_rate": 2.4800000000000004e-06,
38
- "loss": 1.1526,
39
  "step": 125
40
  },
41
  {
42
- "epoch": 3.06,
43
- "learning_rate": 2.9800000000000003e-06,
44
- "loss": 0.9975,
45
  "step": 150
46
  },
47
  {
48
- "epoch": 3.57,
49
- "learning_rate": 3.48e-06,
50
- "loss": 0.797,
51
  "step": 175
52
  },
53
  {
54
- "epoch": 4.08,
55
- "learning_rate": 3.980000000000001e-06,
56
- "loss": 0.6751,
57
  "step": 200
58
  },
59
  {
60
- "epoch": 4.59,
61
- "learning_rate": 4.48e-06,
62
- "loss": 0.5246,
63
  "step": 225
64
  },
65
  {
66
- "epoch": 5.1,
67
- "learning_rate": 4.980000000000001e-06,
68
- "loss": 0.4666,
69
  "step": 250
70
  },
71
  {
72
- "epoch": 5.61,
73
- "learning_rate": 5.480000000000001e-06,
74
- "loss": 0.3392,
75
  "step": 275
76
  },
77
  {
78
- "epoch": 6.12,
79
- "learning_rate": 5.98e-06,
80
- "loss": 0.2899,
81
  "step": 300
82
  },
83
  {
84
- "epoch": 6.63,
85
- "learning_rate": 6.480000000000001e-06,
86
- "loss": 0.1993,
87
  "step": 325
88
  },
89
  {
90
- "epoch": 7.14,
91
- "learning_rate": 6.98e-06,
92
- "loss": 0.1636,
93
  "step": 350
94
  },
95
  {
96
- "epoch": 7.65,
97
- "learning_rate": 7.48e-06,
98
- "loss": 0.1039,
99
  "step": 375
100
  },
101
  {
102
- "epoch": 8.16,
103
- "learning_rate": 7.980000000000002e-06,
104
- "loss": 0.09,
105
  "step": 400
106
  },
107
  {
108
- "epoch": 8.16,
109
- "eval_loss": 0.857236921787262,
110
- "eval_runtime": 149.0293,
111
- "eval_samples_per_second": 6.207,
112
- "eval_steps_per_second": 0.389,
113
- "eval_wer": 50.6875,
114
  "step": 400
115
  },
116
  {
117
- "epoch": 8.67,
118
- "learning_rate": 8.48e-06,
119
- "loss": 0.0536,
120
  "step": 425
121
  },
122
  {
123
- "epoch": 9.18,
124
- "learning_rate": 8.98e-06,
125
- "loss": 0.0403,
126
  "step": 450
127
  },
128
  {
129
- "epoch": 9.69,
130
- "learning_rate": 9.48e-06,
131
- "loss": 0.0278,
132
  "step": 475
133
  },
134
  {
135
- "epoch": 10.2,
136
- "learning_rate": 9.980000000000001e-06,
137
- "loss": 0.0204,
138
  "step": 500
139
  },
140
  {
141
- "epoch": 10.71,
142
- "learning_rate": 9.84e-06,
143
- "loss": 0.0131,
144
  "step": 525
145
  },
146
  {
147
- "epoch": 11.22,
148
- "learning_rate": 9.673333333333334e-06,
149
- "loss": 0.0117,
150
  "step": 550
151
  },
152
  {
153
- "epoch": 11.73,
154
- "learning_rate": 9.506666666666667e-06,
155
- "loss": 0.0083,
156
  "step": 575
157
  },
158
  {
159
- "epoch": 12.24,
160
- "learning_rate": 9.340000000000002e-06,
161
- "loss": 0.0069,
162
  "step": 600
163
  },
164
  {
165
- "epoch": 12.76,
166
- "learning_rate": 9.173333333333334e-06,
167
- "loss": 0.0072,
168
  "step": 625
169
  },
170
  {
171
- "epoch": 13.27,
172
- "learning_rate": 9.006666666666666e-06,
173
- "loss": 0.006,
174
  "step": 650
175
  },
176
  {
177
- "epoch": 13.78,
178
- "learning_rate": 8.84e-06,
179
- "loss": 0.0047,
180
  "step": 675
181
  },
182
  {
183
- "epoch": 14.29,
184
- "learning_rate": 8.673333333333334e-06,
185
- "loss": 0.0034,
186
  "step": 700
187
  },
188
  {
189
- "epoch": 14.8,
190
- "learning_rate": 8.506666666666668e-06,
191
- "loss": 0.0029,
192
  "step": 725
193
  },
194
  {
195
- "epoch": 15.31,
196
- "learning_rate": 8.34e-06,
197
- "loss": 0.003,
198
  "step": 750
199
  },
200
  {
201
- "epoch": 15.82,
202
- "learning_rate": 8.173333333333334e-06,
203
- "loss": 0.0021,
204
  "step": 775
205
  },
206
  {
207
- "epoch": 16.33,
208
- "learning_rate": 8.006666666666667e-06,
209
- "loss": 0.0019,
210
  "step": 800
211
  },
212
  {
213
- "epoch": 16.33,
214
- "eval_loss": 0.9069271087646484,
215
- "eval_runtime": 147.3435,
216
- "eval_samples_per_second": 6.278,
217
- "eval_steps_per_second": 0.394,
218
- "eval_wer": 46.44375,
219
  "step": 800
220
  },
221
  {
222
- "epoch": 16.84,
223
- "learning_rate": 7.840000000000001e-06,
224
- "loss": 0.0019,
225
  "step": 825
226
  },
227
  {
228
- "epoch": 17.35,
229
- "learning_rate": 7.673333333333333e-06,
230
- "loss": 0.0019,
231
  "step": 850
232
  },
233
  {
234
- "epoch": 17.86,
235
- "learning_rate": 7.506666666666668e-06,
236
- "loss": 0.0016,
237
  "step": 875
238
  },
239
  {
240
- "epoch": 18.37,
241
- "learning_rate": 7.340000000000001e-06,
242
- "loss": 0.0018,
243
  "step": 900
244
  },
245
  {
246
- "epoch": 18.88,
247
- "learning_rate": 7.173333333333335e-06,
248
- "loss": 0.0014,
249
  "step": 925
250
  },
251
  {
252
- "epoch": 19.39,
253
- "learning_rate": 7.006666666666667e-06,
254
- "loss": 0.0012,
255
  "step": 950
256
  },
257
  {
258
- "epoch": 19.9,
259
- "learning_rate": 6.8400000000000014e-06,
260
- "loss": 0.0013,
261
  "step": 975
262
  },
263
  {
264
- "epoch": 20.41,
265
- "learning_rate": 6.6733333333333335e-06,
266
- "loss": 0.0012,
267
  "step": 1000
268
  },
269
  {
270
- "epoch": 20.92,
271
- "learning_rate": 6.5066666666666665e-06,
272
- "loss": 0.0011,
273
  "step": 1025
274
  },
275
  {
276
- "epoch": 21.43,
277
- "learning_rate": 6.34e-06,
278
- "loss": 0.001,
279
  "step": 1050
280
  },
281
  {
282
- "epoch": 21.94,
283
- "learning_rate": 6.173333333333333e-06,
284
- "loss": 0.001,
285
  "step": 1075
286
  },
287
  {
288
- "epoch": 22.45,
289
- "learning_rate": 6.006666666666667e-06,
290
- "loss": 0.0009,
291
  "step": 1100
292
  },
293
  {
294
- "epoch": 22.96,
295
- "learning_rate": 5.84e-06,
296
- "loss": 0.0009,
297
  "step": 1125
298
  },
299
  {
300
- "epoch": 23.47,
301
- "learning_rate": 5.673333333333334e-06,
302
- "loss": 0.0009,
303
  "step": 1150
304
  },
305
  {
306
- "epoch": 23.98,
307
- "learning_rate": 5.506666666666667e-06,
308
- "loss": 0.0008,
309
  "step": 1175
310
  },
311
  {
312
- "epoch": 24.49,
313
- "learning_rate": 5.3400000000000005e-06,
314
- "loss": 0.0008,
315
  "step": 1200
316
  },
317
  {
318
- "epoch": 24.49,
319
- "eval_loss": 0.9473171830177307,
320
- "eval_runtime": 148.8306,
321
- "eval_samples_per_second": 6.215,
322
- "eval_steps_per_second": 0.39,
323
- "eval_wer": 46.04375,
324
  "step": 1200
325
  },
326
  {
327
- "epoch": 25.0,
328
- "learning_rate": 5.1733333333333335e-06,
329
- "loss": 0.0008,
330
  "step": 1225
331
  },
332
  {
333
- "epoch": 25.51,
334
- "learning_rate": 5.006666666666667e-06,
335
- "loss": 0.0008,
336
  "step": 1250
337
  },
338
  {
339
- "epoch": 26.02,
340
- "learning_rate": 4.84e-06,
341
- "loss": 0.0007,
342
  "step": 1275
343
  },
344
  {
345
- "epoch": 26.53,
346
- "learning_rate": 4.673333333333333e-06,
347
- "loss": 0.0007,
348
  "step": 1300
349
  },
350
  {
351
- "epoch": 27.04,
352
- "learning_rate": 4.506666666666667e-06,
353
- "loss": 0.0007,
354
  "step": 1325
355
  },
356
  {
357
- "epoch": 27.55,
358
- "learning_rate": 4.34e-06,
359
- "loss": 0.0007,
360
  "step": 1350
361
  },
362
  {
363
- "epoch": 28.06,
364
- "learning_rate": 4.173333333333334e-06,
365
- "loss": 0.0007,
366
  "step": 1375
367
  },
368
  {
369
- "epoch": 28.57,
370
- "learning_rate": 4.006666666666667e-06,
371
- "loss": 0.0006,
372
  "step": 1400
373
  },
374
  {
375
- "epoch": 29.08,
376
- "learning_rate": 3.8400000000000005e-06,
377
- "loss": 0.0007,
378
  "step": 1425
379
  },
380
  {
381
- "epoch": 29.59,
382
- "learning_rate": 3.673333333333334e-06,
383
- "loss": 0.0006,
384
  "step": 1450
385
  },
386
  {
387
- "epoch": 30.1,
388
- "learning_rate": 3.5066666666666673e-06,
389
- "loss": 0.0006,
390
  "step": 1475
391
  },
392
  {
393
- "epoch": 30.61,
394
- "learning_rate": 3.3400000000000006e-06,
395
- "loss": 0.0006,
396
  "step": 1500
397
  },
398
  {
399
- "epoch": 31.12,
400
- "learning_rate": 3.173333333333334e-06,
401
- "loss": 0.0006,
402
  "step": 1525
403
  },
404
  {
405
- "epoch": 31.63,
406
- "learning_rate": 3.0066666666666674e-06,
407
- "loss": 0.0006,
408
  "step": 1550
409
  },
410
  {
411
- "epoch": 32.14,
412
- "learning_rate": 2.84e-06,
413
- "loss": 0.0006,
414
  "step": 1575
415
  },
416
  {
417
- "epoch": 32.65,
418
- "learning_rate": 2.6733333333333333e-06,
419
- "loss": 0.0006,
420
  "step": 1600
421
  },
422
  {
423
- "epoch": 32.65,
424
- "eval_loss": 0.965289831161499,
425
- "eval_runtime": 147.3092,
426
- "eval_samples_per_second": 6.279,
427
- "eval_steps_per_second": 0.394,
428
- "eval_wer": 45.9,
429
  "step": 1600
430
  },
431
  {
432
- "epoch": 33.16,
433
- "learning_rate": 2.5066666666666667e-06,
434
- "loss": 0.0006,
435
  "step": 1625
436
  },
437
  {
438
- "epoch": 33.67,
439
- "learning_rate": 2.3400000000000005e-06,
440
- "loss": 0.0005,
441
  "step": 1650
442
  },
443
  {
444
- "epoch": 34.18,
445
- "learning_rate": 2.1733333333333334e-06,
446
- "loss": 0.0006,
447
  "step": 1675
448
  },
449
  {
450
- "epoch": 34.69,
451
- "learning_rate": 2.006666666666667e-06,
452
- "loss": 0.0005,
453
  "step": 1700
454
  },
455
  {
456
- "epoch": 35.2,
457
- "learning_rate": 1.8400000000000002e-06,
458
- "loss": 0.0006,
459
  "step": 1725
460
  },
461
  {
462
- "epoch": 35.71,
463
- "learning_rate": 1.6733333333333335e-06,
464
- "loss": 0.0005,
465
  "step": 1750
466
  },
467
  {
468
- "epoch": 36.22,
469
- "learning_rate": 1.506666666666667e-06,
470
- "loss": 0.0005,
471
  "step": 1775
472
  },
473
  {
474
- "epoch": 36.73,
475
- "learning_rate": 1.34e-06,
476
- "loss": 0.0005,
477
  "step": 1800
478
  },
479
  {
480
- "epoch": 37.24,
481
- "learning_rate": 1.1733333333333335e-06,
482
- "loss": 0.0005,
483
  "step": 1825
484
  },
485
  {
486
- "epoch": 37.76,
487
- "learning_rate": 1.0066666666666668e-06,
488
- "loss": 0.0005,
489
  "step": 1850
490
  },
491
  {
492
- "epoch": 38.27,
493
- "learning_rate": 8.400000000000001e-07,
494
- "loss": 0.0005,
495
  "step": 1875
496
  },
497
  {
498
- "epoch": 38.78,
499
- "learning_rate": 6.733333333333334e-07,
500
- "loss": 0.0005,
501
  "step": 1900
502
  },
503
  {
504
- "epoch": 39.29,
505
- "learning_rate": 5.066666666666667e-07,
506
- "loss": 0.0005,
507
  "step": 1925
508
  },
509
  {
510
- "epoch": 39.8,
511
- "learning_rate": 3.4000000000000003e-07,
512
- "loss": 0.0005,
513
  "step": 1950
514
  },
515
  {
516
- "epoch": 40.31,
517
- "learning_rate": 1.7333333333333335e-07,
518
- "loss": 0.0005,
519
  "step": 1975
520
  },
521
  {
522
- "epoch": 40.82,
523
- "learning_rate": 6.666666666666667e-09,
524
- "loss": 0.0005,
525
  "step": 2000
526
  },
527
  {
528
- "epoch": 40.82,
529
- "eval_loss": 0.9719672799110413,
530
- "eval_runtime": 147.2827,
531
- "eval_samples_per_second": 6.28,
532
- "eval_steps_per_second": 0.394,
533
- "eval_wer": 45.94375,
534
  "step": 2000
535
  },
536
  {
537
- "epoch": 40.82,
538
  "step": 2000,
539
- "total_flos": 9.188559230976655e+18,
540
- "train_loss": 0.18361923832818866,
541
- "train_runtime": 4653.6924,
542
- "train_samples_per_second": 6.876,
543
- "train_steps_per_second": 0.43
544
  }
545
  ],
546
  "logging_steps": 25,
547
  "max_steps": 2000,
548
  "num_input_tokens_seen": 0,
549
- "num_train_epochs": 41,
550
  "save_steps": 400,
551
- "total_flos": 9.188559230976655e+18,
552
  "train_batch_size": 8,
553
  "trial_name": null,
554
  "trial_params": null
 
1
  {
2
+ "best_metric": 49.90958408679928,
3
  "best_model_checkpoint": "./whisper-small-shona/checkpoint-1600",
4
+ "epoch": 121.21212121212122,
5
  "eval_steps": 400,
6
  "global_step": 2000,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 1.52,
13
  "learning_rate": 4.800000000000001e-07,
14
+ "loss": 2.8865,
15
  "step": 25
16
  },
17
  {
18
+ "epoch": 3.03,
19
+ "learning_rate": 9.600000000000001e-07,
20
+ "loss": 2.3463,
21
  "step": 50
22
  },
23
  {
24
+ "epoch": 4.55,
25
+ "learning_rate": 1.46e-06,
26
+ "loss": 1.7622,
27
  "step": 75
28
  },
29
  {
30
+ "epoch": 6.06,
31
+ "learning_rate": 1.9600000000000003e-06,
32
+ "loss": 1.2611,
33
  "step": 100
34
  },
35
  {
36
+ "epoch": 7.58,
37
+ "learning_rate": 2.46e-06,
38
+ "loss": 0.9353,
39
  "step": 125
40
  },
41
  {
42
+ "epoch": 9.09,
43
+ "learning_rate": 2.96e-06,
44
+ "loss": 0.7091,
45
  "step": 150
46
  },
47
  {
48
+ "epoch": 10.61,
49
+ "learning_rate": 3.46e-06,
50
+ "loss": 0.5145,
51
  "step": 175
52
  },
53
  {
54
+ "epoch": 12.12,
55
+ "learning_rate": 3.96e-06,
56
+ "loss": 0.3821,
57
  "step": 200
58
  },
59
  {
60
+ "epoch": 13.64,
61
+ "learning_rate": 4.4600000000000005e-06,
62
+ "loss": 0.2477,
63
  "step": 225
64
  },
65
  {
66
+ "epoch": 15.15,
67
+ "learning_rate": 4.960000000000001e-06,
68
+ "loss": 0.1587,
69
  "step": 250
70
  },
71
  {
72
+ "epoch": 16.67,
73
+ "learning_rate": 5.460000000000001e-06,
74
+ "loss": 0.0896,
75
  "step": 275
76
  },
77
  {
78
+ "epoch": 18.18,
79
+ "learning_rate": 5.9600000000000005e-06,
80
+ "loss": 0.0466,
81
  "step": 300
82
  },
83
  {
84
+ "epoch": 19.7,
85
+ "learning_rate": 6.460000000000001e-06,
86
+ "loss": 0.0224,
87
  "step": 325
88
  },
89
  {
90
+ "epoch": 21.21,
91
+ "learning_rate": 6.96e-06,
92
+ "loss": 0.0132,
93
  "step": 350
94
  },
95
  {
96
+ "epoch": 22.73,
97
+ "learning_rate": 7.4600000000000006e-06,
98
+ "loss": 0.0084,
99
  "step": 375
100
  },
101
  {
102
+ "epoch": 24.24,
103
+ "learning_rate": 7.960000000000002e-06,
104
+ "loss": 0.0064,
105
  "step": 400
106
  },
107
  {
108
+ "epoch": 24.24,
109
+ "eval_loss": 0.9630287289619446,
110
+ "eval_runtime": 56.9771,
111
+ "eval_samples_per_second": 16.235,
112
+ "eval_steps_per_second": 0.351,
113
+ "eval_wer": 50.72332730560579,
114
  "step": 400
115
  },
116
  {
117
+ "epoch": 25.76,
118
+ "learning_rate": 8.46e-06,
119
+ "loss": 0.0049,
120
  "step": 425
121
  },
122
  {
123
+ "epoch": 27.27,
124
+ "learning_rate": 8.96e-06,
125
+ "loss": 0.0041,
126
  "step": 450
127
  },
128
  {
129
+ "epoch": 28.79,
130
+ "learning_rate": 9.460000000000001e-06,
131
+ "loss": 0.0034,
132
  "step": 475
133
  },
134
  {
135
+ "epoch": 30.3,
136
+ "learning_rate": 9.960000000000001e-06,
137
+ "loss": 0.0029,
138
  "step": 500
139
  },
140
  {
141
+ "epoch": 31.82,
142
+ "learning_rate": 9.846666666666668e-06,
143
+ "loss": 0.0025,
144
  "step": 525
145
  },
146
  {
147
+ "epoch": 33.33,
148
+ "learning_rate": 9.68e-06,
149
+ "loss": 0.0022,
150
  "step": 550
151
  },
152
  {
153
+ "epoch": 34.85,
154
+ "learning_rate": 9.513333333333334e-06,
155
+ "loss": 0.0019,
156
  "step": 575
157
  },
158
  {
159
+ "epoch": 36.36,
160
+ "learning_rate": 9.346666666666666e-06,
161
+ "loss": 0.0017,
162
  "step": 600
163
  },
164
  {
165
+ "epoch": 37.88,
166
+ "learning_rate": 9.180000000000002e-06,
167
+ "loss": 0.0016,
168
  "step": 625
169
  },
170
  {
171
+ "epoch": 39.39,
172
+ "learning_rate": 9.013333333333334e-06,
173
+ "loss": 0.0014,
174
  "step": 650
175
  },
176
  {
177
+ "epoch": 40.91,
178
+ "learning_rate": 8.846666666666668e-06,
179
+ "loss": 0.0013,
180
  "step": 675
181
  },
182
  {
183
+ "epoch": 42.42,
184
+ "learning_rate": 8.68e-06,
185
+ "loss": 0.0012,
186
  "step": 700
187
  },
188
  {
189
+ "epoch": 43.94,
190
+ "learning_rate": 8.513333333333335e-06,
191
+ "loss": 0.0012,
192
  "step": 725
193
  },
194
  {
195
+ "epoch": 45.45,
196
+ "learning_rate": 8.346666666666668e-06,
197
+ "loss": 0.0011,
198
  "step": 750
199
  },
200
  {
201
+ "epoch": 46.97,
202
+ "learning_rate": 8.18e-06,
203
+ "loss": 0.001,
204
  "step": 775
205
  },
206
  {
207
+ "epoch": 48.48,
208
+ "learning_rate": 8.013333333333333e-06,
209
+ "loss": 0.001,
210
  "step": 800
211
  },
212
  {
213
+ "epoch": 48.48,
214
+ "eval_loss": 1.0617018938064575,
215
+ "eval_runtime": 56.831,
216
+ "eval_samples_per_second": 16.276,
217
+ "eval_steps_per_second": 0.352,
218
+ "eval_wer": 49.93972272453286,
219
  "step": 800
220
  },
221
  {
222
+ "epoch": 50.0,
223
+ "learning_rate": 7.846666666666667e-06,
224
+ "loss": 0.0009,
225
  "step": 825
226
  },
227
  {
228
+ "epoch": 51.52,
229
+ "learning_rate": 7.680000000000001e-06,
230
+ "loss": 0.0009,
231
  "step": 850
232
  },
233
  {
234
+ "epoch": 53.03,
235
+ "learning_rate": 7.513333333333334e-06,
236
+ "loss": 0.0008,
237
  "step": 875
238
  },
239
  {
240
+ "epoch": 54.55,
241
+ "learning_rate": 7.346666666666668e-06,
242
+ "loss": 0.0008,
243
  "step": 900
244
  },
245
  {
246
+ "epoch": 56.06,
247
+ "learning_rate": 7.180000000000001e-06,
248
+ "loss": 0.0007,
249
  "step": 925
250
  },
251
  {
252
+ "epoch": 57.58,
253
+ "learning_rate": 7.0133333333333345e-06,
254
+ "loss": 0.0007,
255
  "step": 950
256
  },
257
  {
258
+ "epoch": 59.09,
259
+ "learning_rate": 6.846666666666667e-06,
260
+ "loss": 0.0007,
261
  "step": 975
262
  },
263
  {
264
+ "epoch": 60.61,
265
+ "learning_rate": 6.680000000000001e-06,
266
+ "loss": 0.0007,
267
  "step": 1000
268
  },
269
  {
270
+ "epoch": 62.12,
271
+ "learning_rate": 6.513333333333333e-06,
272
+ "loss": 0.0006,
273
  "step": 1025
274
  },
275
  {
276
+ "epoch": 63.64,
277
+ "learning_rate": 6.346666666666668e-06,
278
+ "loss": 0.0006,
279
  "step": 1050
280
  },
281
  {
282
+ "epoch": 65.15,
283
+ "learning_rate": 6.18e-06,
284
+ "loss": 0.0006,
285
  "step": 1075
286
  },
287
  {
288
+ "epoch": 66.67,
289
+ "learning_rate": 6.013333333333335e-06,
290
+ "loss": 0.0006,
291
  "step": 1100
292
  },
293
  {
294
+ "epoch": 68.18,
295
+ "learning_rate": 5.846666666666667e-06,
296
+ "loss": 0.0005,
297
  "step": 1125
298
  },
299
  {
300
+ "epoch": 69.7,
301
+ "learning_rate": 5.68e-06,
302
+ "loss": 0.0005,
303
  "step": 1150
304
  },
305
  {
306
+ "epoch": 71.21,
307
+ "learning_rate": 5.513333333333334e-06,
308
+ "loss": 0.0005,
309
  "step": 1175
310
  },
311
  {
312
+ "epoch": 72.73,
313
+ "learning_rate": 5.346666666666667e-06,
314
+ "loss": 0.0005,
315
  "step": 1200
316
  },
317
  {
318
+ "epoch": 72.73,
319
+ "eval_loss": 1.101595401763916,
320
+ "eval_runtime": 56.6617,
321
+ "eval_samples_per_second": 16.325,
322
+ "eval_steps_per_second": 0.353,
323
+ "eval_wer": 49.93972272453286,
324
  "step": 1200
325
  },
326
  {
327
+ "epoch": 74.24,
328
+ "learning_rate": 5.18e-06,
329
+ "loss": 0.0005,
330
  "step": 1225
331
  },
332
  {
333
+ "epoch": 75.76,
334
+ "learning_rate": 5.013333333333333e-06,
335
+ "loss": 0.0005,
336
  "step": 1250
337
  },
338
  {
339
+ "epoch": 77.27,
340
+ "learning_rate": 4.846666666666667e-06,
341
+ "loss": 0.0005,
342
  "step": 1275
343
  },
344
  {
345
+ "epoch": 78.79,
346
+ "learning_rate": 4.680000000000001e-06,
347
+ "loss": 0.0005,
348
  "step": 1300
349
  },
350
  {
351
+ "epoch": 80.3,
352
+ "learning_rate": 4.513333333333333e-06,
353
+ "loss": 0.0004,
354
  "step": 1325
355
  },
356
  {
357
+ "epoch": 81.82,
358
+ "learning_rate": 4.346666666666667e-06,
359
+ "loss": 0.0004,
360
  "step": 1350
361
  },
362
  {
363
+ "epoch": 83.33,
364
+ "learning_rate": 4.18e-06,
365
+ "loss": 0.0004,
366
  "step": 1375
367
  },
368
  {
369
+ "epoch": 84.85,
370
+ "learning_rate": 4.013333333333334e-06,
371
+ "loss": 0.0004,
372
  "step": 1400
373
  },
374
  {
375
+ "epoch": 86.36,
376
+ "learning_rate": 3.8466666666666665e-06,
377
+ "loss": 0.0004,
378
  "step": 1425
379
  },
380
  {
381
+ "epoch": 87.88,
382
+ "learning_rate": 3.6800000000000003e-06,
383
+ "loss": 0.0004,
384
  "step": 1450
385
  },
386
  {
387
+ "epoch": 89.39,
388
+ "learning_rate": 3.5133333333333337e-06,
389
+ "loss": 0.0004,
390
  "step": 1475
391
  },
392
  {
393
+ "epoch": 90.91,
394
+ "learning_rate": 3.346666666666667e-06,
395
+ "loss": 0.0004,
396
  "step": 1500
397
  },
398
  {
399
+ "epoch": 92.42,
400
+ "learning_rate": 3.1800000000000005e-06,
401
+ "loss": 0.0004,
402
  "step": 1525
403
  },
404
  {
405
+ "epoch": 93.94,
406
+ "learning_rate": 3.013333333333334e-06,
407
+ "loss": 0.0004,
408
  "step": 1550
409
  },
410
  {
411
+ "epoch": 95.45,
412
+ "learning_rate": 2.8466666666666672e-06,
413
+ "loss": 0.0004,
414
  "step": 1575
415
  },
416
  {
417
+ "epoch": 96.97,
418
+ "learning_rate": 2.68e-06,
419
+ "loss": 0.0004,
420
  "step": 1600
421
  },
422
  {
423
+ "epoch": 96.97,
424
+ "eval_loss": 1.1220260858535767,
425
+ "eval_runtime": 56.5264,
426
+ "eval_samples_per_second": 16.364,
427
+ "eval_steps_per_second": 0.354,
428
+ "eval_wer": 49.90958408679928,
429
  "step": 1600
430
  },
431
  {
432
+ "epoch": 98.48,
433
+ "learning_rate": 2.5133333333333336e-06,
434
+ "loss": 0.0004,
435
  "step": 1625
436
  },
437
  {
438
+ "epoch": 100.0,
439
+ "learning_rate": 2.346666666666667e-06,
440
+ "loss": 0.0004,
441
  "step": 1650
442
  },
443
  {
444
+ "epoch": 101.52,
445
+ "learning_rate": 2.1800000000000003e-06,
446
+ "loss": 0.0004,
447
  "step": 1675
448
  },
449
  {
450
+ "epoch": 103.03,
451
+ "learning_rate": 2.0133333333333337e-06,
452
+ "loss": 0.0004,
453
  "step": 1700
454
  },
455
  {
456
+ "epoch": 104.55,
457
+ "learning_rate": 1.8466666666666668e-06,
458
+ "loss": 0.0004,
459
  "step": 1725
460
  },
461
  {
462
+ "epoch": 106.06,
463
+ "learning_rate": 1.6800000000000002e-06,
464
+ "loss": 0.0003,
465
  "step": 1750
466
  },
467
  {
468
+ "epoch": 107.58,
469
+ "learning_rate": 1.5133333333333334e-06,
470
+ "loss": 0.0003,
471
  "step": 1775
472
  },
473
  {
474
+ "epoch": 109.09,
475
+ "learning_rate": 1.3466666666666668e-06,
476
+ "loss": 0.0003,
477
  "step": 1800
478
  },
479
  {
480
+ "epoch": 110.61,
481
+ "learning_rate": 1.1800000000000001e-06,
482
+ "loss": 0.0003,
483
  "step": 1825
484
  },
485
  {
486
+ "epoch": 112.12,
487
+ "learning_rate": 1.0133333333333333e-06,
488
+ "loss": 0.0003,
489
  "step": 1850
490
  },
491
  {
492
+ "epoch": 113.64,
493
+ "learning_rate": 8.466666666666668e-07,
494
+ "loss": 0.0003,
495
  "step": 1875
496
  },
497
  {
498
+ "epoch": 115.15,
499
+ "learning_rate": 6.800000000000001e-07,
500
+ "loss": 0.0003,
501
  "step": 1900
502
  },
503
  {
504
+ "epoch": 116.67,
505
+ "learning_rate": 5.133333333333334e-07,
506
+ "loss": 0.0003,
507
  "step": 1925
508
  },
509
  {
510
+ "epoch": 118.18,
511
+ "learning_rate": 3.466666666666667e-07,
512
+ "loss": 0.0003,
513
  "step": 1950
514
  },
515
  {
516
+ "epoch": 119.7,
517
+ "learning_rate": 1.8e-07,
518
+ "loss": 0.0003,
519
  "step": 1975
520
  },
521
  {
522
+ "epoch": 121.21,
523
+ "learning_rate": 1.3333333333333334e-08,
524
+ "loss": 0.0003,
525
  "step": 2000
526
  },
527
  {
528
+ "epoch": 121.21,
529
+ "eval_loss": 1.1297551393508911,
530
+ "eval_runtime": 56.6433,
531
+ "eval_samples_per_second": 16.33,
532
+ "eval_steps_per_second": 0.353,
533
+ "eval_wer": 50.04219409282701,
534
  "step": 2000
535
  },
536
  {
537
+ "epoch": 121.21,
538
  "step": 2000,
539
+ "total_flos": 2.770419932175794e+19,
540
+ "train_loss": 0.143078886593692,
541
+ "train_runtime": 4289.2081,
542
+ "train_samples_per_second": 22.382,
543
+ "train_steps_per_second": 0.466
544
  }
545
  ],
546
  "logging_steps": 25,
547
  "max_steps": 2000,
548
  "num_input_tokens_seen": 0,
549
+ "num_train_epochs": 125,
550
  "save_steps": 400,
551
+ "total_flos": 2.770419932175794e+19,
552
  "train_batch_size": 8,
553
  "trial_name": null,
554
  "trial_params": null