steja commited on
Commit
47739f0
1 Parent(s): f883800

End of training

Browse files
Files changed (5) hide show
  1. README.md +10 -9
  2. all_results.json +10 -10
  3. eval_results.json +6 -6
  4. train_results.json +5 -5
  5. trainer_state.json +285 -280
README.md CHANGED
@@ -2,38 +2,39 @@
2
  license: apache-2.0
3
  base_model: openai/whisper-small
4
  tags:
 
5
  - generated_from_trainer
6
  datasets:
7
- - fleurs
8
  metrics:
9
  - wer
10
  model-index:
11
- - name: whisper-small-shona
12
  results:
13
  - task:
14
  name: Automatic Speech Recognition
15
  type: automatic-speech-recognition
16
  dataset:
17
- name: fleurs
18
- type: fleurs
19
  config: sn_zw
20
  split: test
21
  args: sn_zw
22
  metrics:
23
  - name: Wer
24
  type: wer
25
- value: 45.94375
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
29
  should probably proofread and complete it, then remove this comment. -->
30
 
31
- # whisper-small-shona
32
 
33
- This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the fleurs dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.9720
36
- - Wer: 45.9438
37
 
38
  ## Model description
39
 
 
2
  license: apache-2.0
3
  base_model: openai/whisper-small
4
  tags:
5
+ - whisper-event
6
  - generated_from_trainer
7
  datasets:
8
+ - google/fleurs
9
  metrics:
10
  - wer
11
  model-index:
12
+ - name: Whisper small shona
13
  results:
14
  - task:
15
  name: Automatic Speech Recognition
16
  type: automatic-speech-recognition
17
  dataset:
18
+ name: google/fleurs sn_zw
19
+ type: google/fleurs
20
  config: sn_zw
21
  split: test
22
  args: sn_zw
23
  metrics:
24
  - name: Wer
25
  type: wer
26
+ value: 45.9
27
  ---
28
 
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
30
  should probably proofread and complete it, then remove this comment. -->
31
 
32
+ # Whisper small shona
33
 
34
+ This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the google/fleurs sn_zw dataset.
35
  It achieves the following results on the evaluation set:
36
+ - Loss: 0.9653
37
+ - Wer: 45.9
38
 
39
  ## Model description
40
 
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 166.64,
3
- "eval_loss": 1.117426872253418,
4
- "eval_runtime": 243.6546,
5
- "eval_samples_per_second": 3.796,
6
- "eval_steps_per_second": 0.062,
7
- "eval_wer": 50.85625,
8
- "train_loss": 0.1423063586682547,
9
- "train_runtime": 4512.4735,
10
- "train_samples_per_second": 28.366,
11
- "train_steps_per_second": 0.443
12
  }
 
1
  {
2
+ "epoch": 40.82,
3
+ "eval_loss": 0.965289831161499,
4
+ "eval_runtime": 148.1672,
5
+ "eval_samples_per_second": 6.243,
6
+ "eval_steps_per_second": 0.391,
7
+ "eval_wer": 45.9,
8
+ "train_loss": 0.18361923832818866,
9
+ "train_runtime": 4653.6924,
10
+ "train_samples_per_second": 6.876,
11
+ "train_steps_per_second": 0.43
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 166.64,
3
- "eval_loss": 1.117426872253418,
4
- "eval_runtime": 243.6546,
5
- "eval_samples_per_second": 3.796,
6
- "eval_steps_per_second": 0.062,
7
- "eval_wer": 50.85625
8
  }
 
1
  {
2
+ "epoch": 40.82,
3
+ "eval_loss": 0.965289831161499,
4
+ "eval_runtime": 148.1672,
5
+ "eval_samples_per_second": 6.243,
6
+ "eval_steps_per_second": 0.391,
7
+ "eval_wer": 45.9
8
  }
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 166.64,
3
- "train_loss": 0.1423063586682547,
4
- "train_runtime": 4512.4735,
5
- "train_samples_per_second": 28.366,
6
- "train_steps_per_second": 0.443
7
  }
 
1
  {
2
+ "epoch": 40.82,
3
+ "train_loss": 0.18361923832818866,
4
+ "train_runtime": 4653.6924,
5
+ "train_samples_per_second": 6.876,
6
+ "train_steps_per_second": 0.43
7
  }
trainer_state.json CHANGED
@@ -1,550 +1,555 @@
1
  {
2
- "best_metric": 50.85625,
3
- "best_model_checkpoint": "./whisper-small-shona/checkpoint-1200",
4
- "epoch": 166.64,
 
5
  "global_step": 2000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 2.08,
12
  "learning_rate": 4.800000000000001e-07,
13
- "loss": 3.009,
14
  "step": 25
15
  },
16
  {
17
- "epoch": 4.16,
18
- "learning_rate": 9.600000000000001e-07,
19
- "loss": 2.4459,
20
  "step": 50
21
  },
22
  {
23
- "epoch": 6.24,
24
- "learning_rate": 1.46e-06,
25
- "loss": 1.8015,
26
  "step": 75
27
  },
28
  {
29
- "epoch": 8.32,
30
- "learning_rate": 1.9600000000000003e-06,
31
- "loss": 1.2656,
32
  "step": 100
33
  },
34
  {
35
- "epoch": 10.4,
36
- "learning_rate": 2.46e-06,
37
- "loss": 0.9172,
38
  "step": 125
39
  },
40
  {
41
- "epoch": 12.48,
42
- "learning_rate": 2.96e-06,
43
- "loss": 0.6728,
44
  "step": 150
45
  },
46
  {
47
- "epoch": 14.56,
48
- "learning_rate": 3.46e-06,
49
- "loss": 0.4792,
50
  "step": 175
51
  },
52
  {
53
- "epoch": 16.64,
54
- "learning_rate": 3.96e-06,
55
- "loss": 0.32,
56
  "step": 200
57
  },
58
  {
59
- "epoch": 18.72,
60
- "learning_rate": 4.4600000000000005e-06,
61
- "loss": 0.1975,
62
  "step": 225
63
  },
64
  {
65
- "epoch": 20.8,
66
- "learning_rate": 4.960000000000001e-06,
67
- "loss": 0.1101,
68
  "step": 250
69
  },
70
  {
71
- "epoch": 22.88,
72
- "learning_rate": 5.460000000000001e-06,
73
- "loss": 0.0537,
74
  "step": 275
75
  },
76
  {
77
- "epoch": 24.96,
78
- "learning_rate": 5.9600000000000005e-06,
79
- "loss": 0.0256,
80
  "step": 300
81
  },
82
  {
83
- "epoch": 27.08,
84
- "learning_rate": 6.460000000000001e-06,
85
- "loss": 0.014,
86
  "step": 325
87
  },
88
  {
89
- "epoch": 29.16,
90
- "learning_rate": 6.96e-06,
91
- "loss": 0.0092,
92
  "step": 350
93
  },
94
  {
95
- "epoch": 31.24,
96
- "learning_rate": 7.4600000000000006e-06,
97
- "loss": 0.007,
98
  "step": 375
99
  },
100
  {
101
- "epoch": 33.32,
102
- "learning_rate": 7.960000000000002e-06,
103
- "loss": 0.0054,
104
  "step": 400
105
  },
106
  {
107
- "epoch": 33.32,
108
- "eval_loss": 0.9825747013092041,
109
- "eval_runtime": 245.236,
110
- "eval_samples_per_second": 3.772,
111
- "eval_steps_per_second": 0.061,
112
- "eval_wer": 51.668749999999996,
113
  "step": 400
114
  },
115
  {
116
- "epoch": 35.4,
117
- "learning_rate": 8.46e-06,
118
- "loss": 0.0043,
119
  "step": 425
120
  },
121
  {
122
- "epoch": 37.48,
123
- "learning_rate": 8.96e-06,
124
- "loss": 0.0036,
125
  "step": 450
126
  },
127
  {
128
- "epoch": 39.56,
129
- "learning_rate": 9.460000000000001e-06,
130
- "loss": 0.003,
131
  "step": 475
132
  },
133
  {
134
- "epoch": 41.64,
135
- "learning_rate": 9.960000000000001e-06,
136
- "loss": 0.0026,
137
  "step": 500
138
  },
139
  {
140
- "epoch": 43.72,
141
- "learning_rate": 9.846666666666668e-06,
142
- "loss": 0.0022,
143
  "step": 525
144
  },
145
  {
146
- "epoch": 45.8,
147
- "learning_rate": 9.68e-06,
148
- "loss": 0.0019,
149
  "step": 550
150
  },
151
  {
152
- "epoch": 47.88,
153
- "learning_rate": 9.513333333333334e-06,
154
- "loss": 0.0017,
155
  "step": 575
156
  },
157
  {
158
- "epoch": 49.96,
159
- "learning_rate": 9.346666666666666e-06,
160
- "loss": 0.0016,
161
  "step": 600
162
  },
163
  {
164
- "epoch": 52.08,
165
- "learning_rate": 9.180000000000002e-06,
166
- "loss": 0.0015,
167
  "step": 625
168
  },
169
  {
170
- "epoch": 54.16,
171
- "learning_rate": 9.013333333333334e-06,
172
- "loss": 0.0013,
173
  "step": 650
174
  },
175
  {
176
- "epoch": 56.24,
177
- "learning_rate": 8.846666666666668e-06,
178
- "loss": 0.0012,
179
  "step": 675
180
  },
181
  {
182
- "epoch": 58.32,
183
- "learning_rate": 8.68e-06,
184
- "loss": 0.0011,
185
  "step": 700
186
  },
187
  {
188
- "epoch": 60.4,
189
- "learning_rate": 8.513333333333335e-06,
190
- "loss": 0.001,
191
  "step": 725
192
  },
193
  {
194
- "epoch": 62.48,
195
- "learning_rate": 8.346666666666668e-06,
196
- "loss": 0.001,
197
  "step": 750
198
  },
199
  {
200
- "epoch": 64.56,
201
- "learning_rate": 8.18e-06,
202
- "loss": 0.0009,
203
  "step": 775
204
  },
205
  {
206
- "epoch": 66.64,
207
- "learning_rate": 8.013333333333333e-06,
208
- "loss": 0.0009,
209
  "step": 800
210
  },
211
  {
212
- "epoch": 66.64,
213
- "eval_loss": 1.0774297714233398,
214
- "eval_runtime": 228.2657,
215
- "eval_samples_per_second": 4.052,
216
- "eval_steps_per_second": 0.066,
217
- "eval_wer": 50.90625,
218
  "step": 800
219
  },
220
  {
221
- "epoch": 68.72,
222
- "learning_rate": 7.846666666666667e-06,
223
- "loss": 0.0008,
224
  "step": 825
225
  },
226
  {
227
- "epoch": 70.8,
228
- "learning_rate": 7.680000000000001e-06,
229
- "loss": 0.0008,
230
  "step": 850
231
  },
232
  {
233
- "epoch": 72.88,
234
- "learning_rate": 7.513333333333334e-06,
235
- "loss": 0.0007,
236
  "step": 875
237
  },
238
  {
239
- "epoch": 74.96,
240
- "learning_rate": 7.346666666666668e-06,
241
- "loss": 0.0007,
242
  "step": 900
243
  },
244
  {
245
- "epoch": 77.08,
246
- "learning_rate": 7.180000000000001e-06,
247
- "loss": 0.0007,
248
  "step": 925
249
  },
250
  {
251
- "epoch": 79.16,
252
- "learning_rate": 7.0133333333333345e-06,
253
- "loss": 0.0007,
254
  "step": 950
255
  },
256
  {
257
- "epoch": 81.24,
258
- "learning_rate": 6.846666666666667e-06,
259
- "loss": 0.0006,
260
  "step": 975
261
  },
262
  {
263
- "epoch": 83.32,
264
- "learning_rate": 6.680000000000001e-06,
265
- "loss": 0.0006,
266
  "step": 1000
267
  },
268
  {
269
- "epoch": 85.4,
270
- "learning_rate": 6.513333333333333e-06,
271
- "loss": 0.0006,
272
  "step": 1025
273
  },
274
  {
275
- "epoch": 87.48,
276
- "learning_rate": 6.346666666666668e-06,
277
- "loss": 0.0006,
278
  "step": 1050
279
  },
280
  {
281
- "epoch": 89.56,
282
- "learning_rate": 6.18e-06,
283
- "loss": 0.0005,
284
  "step": 1075
285
  },
286
  {
287
- "epoch": 91.64,
288
- "learning_rate": 6.013333333333335e-06,
289
- "loss": 0.0005,
290
  "step": 1100
291
  },
292
  {
293
- "epoch": 93.72,
294
- "learning_rate": 5.846666666666667e-06,
295
- "loss": 0.0005,
296
  "step": 1125
297
  },
298
  {
299
- "epoch": 95.8,
300
- "learning_rate": 5.68e-06,
301
- "loss": 0.0005,
302
  "step": 1150
303
  },
304
  {
305
- "epoch": 97.88,
306
- "learning_rate": 5.513333333333334e-06,
307
- "loss": 0.0005,
308
  "step": 1175
309
  },
310
  {
311
- "epoch": 99.96,
312
- "learning_rate": 5.346666666666667e-06,
313
- "loss": 0.0005,
314
  "step": 1200
315
  },
316
  {
317
- "epoch": 99.96,
318
- "eval_loss": 1.117426872253418,
319
- "eval_runtime": 245.3452,
320
- "eval_samples_per_second": 3.77,
321
- "eval_steps_per_second": 0.061,
322
- "eval_wer": 50.85625,
323
  "step": 1200
324
  },
325
  {
326
- "epoch": 102.08,
327
- "learning_rate": 5.18e-06,
328
- "loss": 0.0005,
329
  "step": 1225
330
  },
331
  {
332
- "epoch": 104.16,
333
- "learning_rate": 5.013333333333333e-06,
334
- "loss": 0.0004,
335
  "step": 1250
336
  },
337
  {
338
- "epoch": 106.24,
339
- "learning_rate": 4.846666666666667e-06,
340
- "loss": 0.0004,
341
  "step": 1275
342
  },
343
  {
344
- "epoch": 108.32,
345
- "learning_rate": 4.680000000000001e-06,
346
- "loss": 0.0004,
347
  "step": 1300
348
  },
349
  {
350
- "epoch": 110.4,
351
- "learning_rate": 4.513333333333333e-06,
352
- "loss": 0.0004,
353
  "step": 1325
354
  },
355
  {
356
- "epoch": 112.48,
357
- "learning_rate": 4.346666666666667e-06,
358
- "loss": 0.0004,
359
  "step": 1350
360
  },
361
  {
362
- "epoch": 114.56,
363
- "learning_rate": 4.18e-06,
364
- "loss": 0.0004,
365
  "step": 1375
366
  },
367
  {
368
- "epoch": 116.64,
369
- "learning_rate": 4.013333333333334e-06,
370
- "loss": 0.0004,
371
  "step": 1400
372
  },
373
  {
374
- "epoch": 118.72,
375
- "learning_rate": 3.8466666666666665e-06,
376
- "loss": 0.0004,
377
  "step": 1425
378
  },
379
  {
380
- "epoch": 120.8,
381
- "learning_rate": 3.6800000000000003e-06,
382
- "loss": 0.0004,
383
  "step": 1450
384
  },
385
  {
386
- "epoch": 122.88,
387
- "learning_rate": 3.5133333333333337e-06,
388
- "loss": 0.0004,
389
  "step": 1475
390
  },
391
  {
392
- "epoch": 124.96,
393
- "learning_rate": 3.346666666666667e-06,
394
- "loss": 0.0004,
395
  "step": 1500
396
  },
397
  {
398
- "epoch": 127.08,
399
- "learning_rate": 3.1800000000000005e-06,
400
- "loss": 0.0004,
401
  "step": 1525
402
  },
403
  {
404
- "epoch": 129.16,
405
- "learning_rate": 3.013333333333334e-06,
406
- "loss": 0.0004,
407
  "step": 1550
408
  },
409
  {
410
- "epoch": 131.24,
411
- "learning_rate": 2.8466666666666672e-06,
412
- "loss": 0.0003,
413
  "step": 1575
414
  },
415
  {
416
- "epoch": 133.32,
417
- "learning_rate": 2.68e-06,
418
- "loss": 0.0003,
419
  "step": 1600
420
  },
421
  {
422
- "epoch": 133.32,
423
- "eval_loss": 1.138814926147461,
424
- "eval_runtime": 244.5662,
425
- "eval_samples_per_second": 3.782,
426
- "eval_steps_per_second": 0.061,
427
- "eval_wer": 50.875,
428
  "step": 1600
429
  },
430
  {
431
- "epoch": 135.4,
432
- "learning_rate": 2.5133333333333336e-06,
433
- "loss": 0.0003,
434
  "step": 1625
435
  },
436
  {
437
- "epoch": 137.48,
438
- "learning_rate": 2.346666666666667e-06,
439
- "loss": 0.0003,
440
  "step": 1650
441
  },
442
  {
443
- "epoch": 139.56,
444
- "learning_rate": 2.1800000000000003e-06,
445
- "loss": 0.0003,
446
  "step": 1675
447
  },
448
  {
449
- "epoch": 141.64,
450
- "learning_rate": 2.0133333333333337e-06,
451
- "loss": 0.0003,
452
  "step": 1700
453
  },
454
  {
455
- "epoch": 143.72,
456
- "learning_rate": 1.8466666666666668e-06,
457
- "loss": 0.0003,
458
  "step": 1725
459
  },
460
  {
461
- "epoch": 145.8,
462
- "learning_rate": 1.6800000000000002e-06,
463
- "loss": 0.0003,
464
  "step": 1750
465
  },
466
  {
467
- "epoch": 147.88,
468
- "learning_rate": 1.5133333333333334e-06,
469
- "loss": 0.0003,
470
  "step": 1775
471
  },
472
  {
473
- "epoch": 149.96,
474
- "learning_rate": 1.3466666666666668e-06,
475
- "loss": 0.0003,
476
  "step": 1800
477
  },
478
  {
479
- "epoch": 152.08,
480
- "learning_rate": 1.1800000000000001e-06,
481
- "loss": 0.0003,
482
  "step": 1825
483
  },
484
  {
485
- "epoch": 154.16,
486
- "learning_rate": 1.0133333333333333e-06,
487
- "loss": 0.0003,
488
  "step": 1850
489
  },
490
  {
491
- "epoch": 156.24,
492
- "learning_rate": 8.466666666666668e-07,
493
- "loss": 0.0003,
494
  "step": 1875
495
  },
496
  {
497
- "epoch": 158.32,
498
- "learning_rate": 6.800000000000001e-07,
499
- "loss": 0.0003,
500
  "step": 1900
501
  },
502
  {
503
- "epoch": 160.4,
504
- "learning_rate": 5.133333333333334e-07,
505
- "loss": 0.0003,
506
  "step": 1925
507
  },
508
  {
509
- "epoch": 162.48,
510
- "learning_rate": 3.466666666666667e-07,
511
- "loss": 0.0003,
512
  "step": 1950
513
  },
514
  {
515
- "epoch": 164.56,
516
- "learning_rate": 1.8e-07,
517
- "loss": 0.0003,
518
  "step": 1975
519
  },
520
  {
521
- "epoch": 166.64,
522
- "learning_rate": 1.3333333333333334e-08,
523
- "loss": 0.0003,
524
  "step": 2000
525
  },
526
  {
527
- "epoch": 166.64,
528
- "eval_loss": 1.1461412906646729,
529
- "eval_runtime": 244.933,
530
- "eval_samples_per_second": 3.777,
531
- "eval_steps_per_second": 0.061,
532
- "eval_wer": 50.925,
533
  "step": 2000
534
  },
535
  {
536
- "epoch": 166.64,
537
  "step": 2000,
538
- "total_flos": 3.770541405645465e+19,
539
- "train_loss": 0.1423063586682547,
540
- "train_runtime": 4512.4735,
541
- "train_samples_per_second": 28.366,
542
- "train_steps_per_second": 0.443
543
  }
544
  ],
 
545
  "max_steps": 2000,
546
- "num_train_epochs": 167,
547
- "total_flos": 3.770541405645465e+19,
 
 
 
548
  "trial_name": null,
549
  "trial_params": null
550
  }
 
1
  {
2
+ "best_metric": 45.9,
3
+ "best_model_checkpoint": "./whisper-small-shona/checkpoint-1600",
4
+ "epoch": 40.816326530612244,
5
+ "eval_steps": 400,
6
  "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.51,
13
  "learning_rate": 4.800000000000001e-07,
14
+ "loss": 2.8886,
15
  "step": 25
16
  },
17
  {
18
+ "epoch": 1.02,
19
+ "learning_rate": 9.800000000000001e-07,
20
+ "loss": 2.4034,
21
  "step": 50
22
  },
23
  {
24
+ "epoch": 1.53,
25
+ "learning_rate": 1.48e-06,
26
+ "loss": 1.8889,
27
  "step": 75
28
  },
29
  {
30
+ "epoch": 2.04,
31
+ "learning_rate": 1.98e-06,
32
+ "loss": 1.4574,
33
  "step": 100
34
  },
35
  {
36
+ "epoch": 2.55,
37
+ "learning_rate": 2.4800000000000004e-06,
38
+ "loss": 1.1526,
39
  "step": 125
40
  },
41
  {
42
+ "epoch": 3.06,
43
+ "learning_rate": 2.9800000000000003e-06,
44
+ "loss": 0.9975,
45
  "step": 150
46
  },
47
  {
48
+ "epoch": 3.57,
49
+ "learning_rate": 3.48e-06,
50
+ "loss": 0.797,
51
  "step": 175
52
  },
53
  {
54
+ "epoch": 4.08,
55
+ "learning_rate": 3.980000000000001e-06,
56
+ "loss": 0.6751,
57
  "step": 200
58
  },
59
  {
60
+ "epoch": 4.59,
61
+ "learning_rate": 4.48e-06,
62
+ "loss": 0.5246,
63
  "step": 225
64
  },
65
  {
66
+ "epoch": 5.1,
67
+ "learning_rate": 4.980000000000001e-06,
68
+ "loss": 0.4666,
69
  "step": 250
70
  },
71
  {
72
+ "epoch": 5.61,
73
+ "learning_rate": 5.480000000000001e-06,
74
+ "loss": 0.3392,
75
  "step": 275
76
  },
77
  {
78
+ "epoch": 6.12,
79
+ "learning_rate": 5.98e-06,
80
+ "loss": 0.2899,
81
  "step": 300
82
  },
83
  {
84
+ "epoch": 6.63,
85
+ "learning_rate": 6.480000000000001e-06,
86
+ "loss": 0.1993,
87
  "step": 325
88
  },
89
  {
90
+ "epoch": 7.14,
91
+ "learning_rate": 6.98e-06,
92
+ "loss": 0.1636,
93
  "step": 350
94
  },
95
  {
96
+ "epoch": 7.65,
97
+ "learning_rate": 7.48e-06,
98
+ "loss": 0.1039,
99
  "step": 375
100
  },
101
  {
102
+ "epoch": 8.16,
103
+ "learning_rate": 7.980000000000002e-06,
104
+ "loss": 0.09,
105
  "step": 400
106
  },
107
  {
108
+ "epoch": 8.16,
109
+ "eval_loss": 0.857236921787262,
110
+ "eval_runtime": 149.0293,
111
+ "eval_samples_per_second": 6.207,
112
+ "eval_steps_per_second": 0.389,
113
+ "eval_wer": 50.6875,
114
  "step": 400
115
  },
116
  {
117
+ "epoch": 8.67,
118
+ "learning_rate": 8.48e-06,
119
+ "loss": 0.0536,
120
  "step": 425
121
  },
122
  {
123
+ "epoch": 9.18,
124
+ "learning_rate": 8.98e-06,
125
+ "loss": 0.0403,
126
  "step": 450
127
  },
128
  {
129
+ "epoch": 9.69,
130
+ "learning_rate": 9.48e-06,
131
+ "loss": 0.0278,
132
  "step": 475
133
  },
134
  {
135
+ "epoch": 10.2,
136
+ "learning_rate": 9.980000000000001e-06,
137
+ "loss": 0.0204,
138
  "step": 500
139
  },
140
  {
141
+ "epoch": 10.71,
142
+ "learning_rate": 9.84e-06,
143
+ "loss": 0.0131,
144
  "step": 525
145
  },
146
  {
147
+ "epoch": 11.22,
148
+ "learning_rate": 9.673333333333334e-06,
149
+ "loss": 0.0117,
150
  "step": 550
151
  },
152
  {
153
+ "epoch": 11.73,
154
+ "learning_rate": 9.506666666666667e-06,
155
+ "loss": 0.0083,
156
  "step": 575
157
  },
158
  {
159
+ "epoch": 12.24,
160
+ "learning_rate": 9.340000000000002e-06,
161
+ "loss": 0.0069,
162
  "step": 600
163
  },
164
  {
165
+ "epoch": 12.76,
166
+ "learning_rate": 9.173333333333334e-06,
167
+ "loss": 0.0072,
168
  "step": 625
169
  },
170
  {
171
+ "epoch": 13.27,
172
+ "learning_rate": 9.006666666666666e-06,
173
+ "loss": 0.006,
174
  "step": 650
175
  },
176
  {
177
+ "epoch": 13.78,
178
+ "learning_rate": 8.84e-06,
179
+ "loss": 0.0047,
180
  "step": 675
181
  },
182
  {
183
+ "epoch": 14.29,
184
+ "learning_rate": 8.673333333333334e-06,
185
+ "loss": 0.0034,
186
  "step": 700
187
  },
188
  {
189
+ "epoch": 14.8,
190
+ "learning_rate": 8.506666666666668e-06,
191
+ "loss": 0.0029,
192
  "step": 725
193
  },
194
  {
195
+ "epoch": 15.31,
196
+ "learning_rate": 8.34e-06,
197
+ "loss": 0.003,
198
  "step": 750
199
  },
200
  {
201
+ "epoch": 15.82,
202
+ "learning_rate": 8.173333333333334e-06,
203
+ "loss": 0.0021,
204
  "step": 775
205
  },
206
  {
207
+ "epoch": 16.33,
208
+ "learning_rate": 8.006666666666667e-06,
209
+ "loss": 0.0019,
210
  "step": 800
211
  },
212
  {
213
+ "epoch": 16.33,
214
+ "eval_loss": 0.9069271087646484,
215
+ "eval_runtime": 147.3435,
216
+ "eval_samples_per_second": 6.278,
217
+ "eval_steps_per_second": 0.394,
218
+ "eval_wer": 46.44375,
219
  "step": 800
220
  },
221
  {
222
+ "epoch": 16.84,
223
+ "learning_rate": 7.840000000000001e-06,
224
+ "loss": 0.0019,
225
  "step": 825
226
  },
227
  {
228
+ "epoch": 17.35,
229
+ "learning_rate": 7.673333333333333e-06,
230
+ "loss": 0.0019,
231
  "step": 850
232
  },
233
  {
234
+ "epoch": 17.86,
235
+ "learning_rate": 7.506666666666668e-06,
236
+ "loss": 0.0016,
237
  "step": 875
238
  },
239
  {
240
+ "epoch": 18.37,
241
+ "learning_rate": 7.340000000000001e-06,
242
+ "loss": 0.0018,
243
  "step": 900
244
  },
245
  {
246
+ "epoch": 18.88,
247
+ "learning_rate": 7.173333333333335e-06,
248
+ "loss": 0.0014,
249
  "step": 925
250
  },
251
  {
252
+ "epoch": 19.39,
253
+ "learning_rate": 7.006666666666667e-06,
254
+ "loss": 0.0012,
255
  "step": 950
256
  },
257
  {
258
+ "epoch": 19.9,
259
+ "learning_rate": 6.8400000000000014e-06,
260
+ "loss": 0.0013,
261
  "step": 975
262
  },
263
  {
264
+ "epoch": 20.41,
265
+ "learning_rate": 6.6733333333333335e-06,
266
+ "loss": 0.0012,
267
  "step": 1000
268
  },
269
  {
270
+ "epoch": 20.92,
271
+ "learning_rate": 6.5066666666666665e-06,
272
+ "loss": 0.0011,
273
  "step": 1025
274
  },
275
  {
276
+ "epoch": 21.43,
277
+ "learning_rate": 6.34e-06,
278
+ "loss": 0.001,
279
  "step": 1050
280
  },
281
  {
282
+ "epoch": 21.94,
283
+ "learning_rate": 6.173333333333333e-06,
284
+ "loss": 0.001,
285
  "step": 1075
286
  },
287
  {
288
+ "epoch": 22.45,
289
+ "learning_rate": 6.006666666666667e-06,
290
+ "loss": 0.0009,
291
  "step": 1100
292
  },
293
  {
294
+ "epoch": 22.96,
295
+ "learning_rate": 5.84e-06,
296
+ "loss": 0.0009,
297
  "step": 1125
298
  },
299
  {
300
+ "epoch": 23.47,
301
+ "learning_rate": 5.673333333333334e-06,
302
+ "loss": 0.0009,
303
  "step": 1150
304
  },
305
  {
306
+ "epoch": 23.98,
307
+ "learning_rate": 5.506666666666667e-06,
308
+ "loss": 0.0008,
309
  "step": 1175
310
  },
311
  {
312
+ "epoch": 24.49,
313
+ "learning_rate": 5.3400000000000005e-06,
314
+ "loss": 0.0008,
315
  "step": 1200
316
  },
317
  {
318
+ "epoch": 24.49,
319
+ "eval_loss": 0.9473171830177307,
320
+ "eval_runtime": 148.8306,
321
+ "eval_samples_per_second": 6.215,
322
+ "eval_steps_per_second": 0.39,
323
+ "eval_wer": 46.04375,
324
  "step": 1200
325
  },
326
  {
327
+ "epoch": 25.0,
328
+ "learning_rate": 5.1733333333333335e-06,
329
+ "loss": 0.0008,
330
  "step": 1225
331
  },
332
  {
333
+ "epoch": 25.51,
334
+ "learning_rate": 5.006666666666667e-06,
335
+ "loss": 0.0008,
336
  "step": 1250
337
  },
338
  {
339
+ "epoch": 26.02,
340
+ "learning_rate": 4.84e-06,
341
+ "loss": 0.0007,
342
  "step": 1275
343
  },
344
  {
345
+ "epoch": 26.53,
346
+ "learning_rate": 4.673333333333333e-06,
347
+ "loss": 0.0007,
348
  "step": 1300
349
  },
350
  {
351
+ "epoch": 27.04,
352
+ "learning_rate": 4.506666666666667e-06,
353
+ "loss": 0.0007,
354
  "step": 1325
355
  },
356
  {
357
+ "epoch": 27.55,
358
+ "learning_rate": 4.34e-06,
359
+ "loss": 0.0007,
360
  "step": 1350
361
  },
362
  {
363
+ "epoch": 28.06,
364
+ "learning_rate": 4.173333333333334e-06,
365
+ "loss": 0.0007,
366
  "step": 1375
367
  },
368
  {
369
+ "epoch": 28.57,
370
+ "learning_rate": 4.006666666666667e-06,
371
+ "loss": 0.0006,
372
  "step": 1400
373
  },
374
  {
375
+ "epoch": 29.08,
376
+ "learning_rate": 3.8400000000000005e-06,
377
+ "loss": 0.0007,
378
  "step": 1425
379
  },
380
  {
381
+ "epoch": 29.59,
382
+ "learning_rate": 3.673333333333334e-06,
383
+ "loss": 0.0006,
384
  "step": 1450
385
  },
386
  {
387
+ "epoch": 30.1,
388
+ "learning_rate": 3.5066666666666673e-06,
389
+ "loss": 0.0006,
390
  "step": 1475
391
  },
392
  {
393
+ "epoch": 30.61,
394
+ "learning_rate": 3.3400000000000006e-06,
395
+ "loss": 0.0006,
396
  "step": 1500
397
  },
398
  {
399
+ "epoch": 31.12,
400
+ "learning_rate": 3.173333333333334e-06,
401
+ "loss": 0.0006,
402
  "step": 1525
403
  },
404
  {
405
+ "epoch": 31.63,
406
+ "learning_rate": 3.0066666666666674e-06,
407
+ "loss": 0.0006,
408
  "step": 1550
409
  },
410
  {
411
+ "epoch": 32.14,
412
+ "learning_rate": 2.84e-06,
413
+ "loss": 0.0006,
414
  "step": 1575
415
  },
416
  {
417
+ "epoch": 32.65,
418
+ "learning_rate": 2.6733333333333333e-06,
419
+ "loss": 0.0006,
420
  "step": 1600
421
  },
422
  {
423
+ "epoch": 32.65,
424
+ "eval_loss": 0.965289831161499,
425
+ "eval_runtime": 147.3092,
426
+ "eval_samples_per_second": 6.279,
427
+ "eval_steps_per_second": 0.394,
428
+ "eval_wer": 45.9,
429
  "step": 1600
430
  },
431
  {
432
+ "epoch": 33.16,
433
+ "learning_rate": 2.5066666666666667e-06,
434
+ "loss": 0.0006,
435
  "step": 1625
436
  },
437
  {
438
+ "epoch": 33.67,
439
+ "learning_rate": 2.3400000000000005e-06,
440
+ "loss": 0.0005,
441
  "step": 1650
442
  },
443
  {
444
+ "epoch": 34.18,
445
+ "learning_rate": 2.1733333333333334e-06,
446
+ "loss": 0.0006,
447
  "step": 1675
448
  },
449
  {
450
+ "epoch": 34.69,
451
+ "learning_rate": 2.006666666666667e-06,
452
+ "loss": 0.0005,
453
  "step": 1700
454
  },
455
  {
456
+ "epoch": 35.2,
457
+ "learning_rate": 1.8400000000000002e-06,
458
+ "loss": 0.0006,
459
  "step": 1725
460
  },
461
  {
462
+ "epoch": 35.71,
463
+ "learning_rate": 1.6733333333333335e-06,
464
+ "loss": 0.0005,
465
  "step": 1750
466
  },
467
  {
468
+ "epoch": 36.22,
469
+ "learning_rate": 1.506666666666667e-06,
470
+ "loss": 0.0005,
471
  "step": 1775
472
  },
473
  {
474
+ "epoch": 36.73,
475
+ "learning_rate": 1.34e-06,
476
+ "loss": 0.0005,
477
  "step": 1800
478
  },
479
  {
480
+ "epoch": 37.24,
481
+ "learning_rate": 1.1733333333333335e-06,
482
+ "loss": 0.0005,
483
  "step": 1825
484
  },
485
  {
486
+ "epoch": 37.76,
487
+ "learning_rate": 1.0066666666666668e-06,
488
+ "loss": 0.0005,
489
  "step": 1850
490
  },
491
  {
492
+ "epoch": 38.27,
493
+ "learning_rate": 8.400000000000001e-07,
494
+ "loss": 0.0005,
495
  "step": 1875
496
  },
497
  {
498
+ "epoch": 38.78,
499
+ "learning_rate": 6.733333333333334e-07,
500
+ "loss": 0.0005,
501
  "step": 1900
502
  },
503
  {
504
+ "epoch": 39.29,
505
+ "learning_rate": 5.066666666666667e-07,
506
+ "loss": 0.0005,
507
  "step": 1925
508
  },
509
  {
510
+ "epoch": 39.8,
511
+ "learning_rate": 3.4000000000000003e-07,
512
+ "loss": 0.0005,
513
  "step": 1950
514
  },
515
  {
516
+ "epoch": 40.31,
517
+ "learning_rate": 1.7333333333333335e-07,
518
+ "loss": 0.0005,
519
  "step": 1975
520
  },
521
  {
522
+ "epoch": 40.82,
523
+ "learning_rate": 6.666666666666667e-09,
524
+ "loss": 0.0005,
525
  "step": 2000
526
  },
527
  {
528
+ "epoch": 40.82,
529
+ "eval_loss": 0.9719672799110413,
530
+ "eval_runtime": 147.2827,
531
+ "eval_samples_per_second": 6.28,
532
+ "eval_steps_per_second": 0.394,
533
+ "eval_wer": 45.94375,
534
  "step": 2000
535
  },
536
  {
537
+ "epoch": 40.82,
538
  "step": 2000,
539
+ "total_flos": 9.188559230976655e+18,
540
+ "train_loss": 0.18361923832818866,
541
+ "train_runtime": 4653.6924,
542
+ "train_samples_per_second": 6.876,
543
+ "train_steps_per_second": 0.43
544
  }
545
  ],
546
+ "logging_steps": 25,
547
  "max_steps": 2000,
548
+ "num_input_tokens_seen": 0,
549
+ "num_train_epochs": 41,
550
+ "save_steps": 400,
551
+ "total_flos": 9.188559230976655e+18,
552
+ "train_batch_size": 8,
553
  "trial_name": null,
554
  "trial_params": null
555
  }