DewiBrynJones commited on
Commit
2aa4d93
1 Parent(s): edfd598

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
 
 
5
  - generated_from_trainer
6
  metrics:
7
  - wer
@@ -15,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # wav2vec2-xlsr-53-ft-btb-ccv-cy
17
 
18
- This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 0.4942
21
  - Wer: 0.3917
 
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
5
+ - automatic-speech-recognition
6
+ - DewiBrynJones/banc-trawsgrifiadau-bangor-clean-with-ccv
7
  - generated_from_trainer
8
  metrics:
9
  - wer
 
17
 
18
  # wav2vec2-xlsr-53-ft-btb-ccv-cy
19
 
20
+ This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on the DEWIBRYNJONES/BANC-TRAWSGRIFIADAU-BANGOR-CLEAN-WITH-CCV - DEFAULT dataset.
21
  It achieves the following results on the evaluation set:
22
  - Loss: 0.4942
23
  - Wer: 0.3917
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 2.0161290322580645,
3
- "eval_loss": 1.8412970304489136,
4
- "eval_runtime": 190.5883,
5
  "eval_samples": 7022,
6
- "eval_samples_per_second": 36.844,
7
- "eval_steps_per_second": 0.577,
8
- "eval_wer": 0.9956417172696953,
9
- "total_flos": 1.3379687773166643e+19,
10
- "train_loss": 1.3496905517578126,
11
- "train_runtime": 9965.7858,
12
  "train_samples": 47607,
13
- "train_samples_per_second": 9.633,
14
- "train_steps_per_second": 0.602
15
  }
 
1
  {
2
+ "epoch": 1.0080645161290323,
3
+ "eval_loss": 0.4942198395729065,
4
+ "eval_runtime": 177.2206,
5
  "eval_samples": 7022,
6
+ "eval_samples_per_second": 39.623,
7
+ "eval_steps_per_second": 2.477,
8
+ "eval_wer": 0.3917272352808302,
9
+ "total_flos": 6.709869296482936e+18,
10
+ "train_loss": 1.1787635701497396,
11
+ "train_runtime": 4443.9077,
12
  "train_samples": 47607,
13
+ "train_samples_per_second": 10.801,
14
+ "train_steps_per_second": 0.675
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 2.0161290322580645,
3
- "eval_loss": 1.8412970304489136,
4
- "eval_runtime": 190.5883,
5
  "eval_samples": 7022,
6
- "eval_samples_per_second": 36.844,
7
- "eval_steps_per_second": 0.577,
8
- "eval_wer": 0.9956417172696953
9
  }
 
1
  {
2
+ "epoch": 1.0080645161290323,
3
+ "eval_loss": 0.4942198395729065,
4
+ "eval_runtime": 177.2206,
5
  "eval_samples": 7022,
6
+ "eval_samples_per_second": 39.623,
7
+ "eval_steps_per_second": 2.477,
8
+ "eval_wer": 0.3917272352808302
9
  }
runs/Aug29_18-19-07_6d77da852b30/events.out.tfevents.1724957478.6d77da852b30.1440.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bcfb656ce16f70f93fd347aed2079d1fe51fb54b44d46b0ebc76ad7d88eba1e
3
+ size 406
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 2.0161290322580645,
3
- "total_flos": 1.3379687773166643e+19,
4
- "train_loss": 1.3496905517578126,
5
- "train_runtime": 9965.7858,
6
  "train_samples": 47607,
7
- "train_samples_per_second": 9.633,
8
- "train_steps_per_second": 0.602
9
  }
 
1
  {
2
+ "epoch": 1.0080645161290323,
3
+ "total_flos": 6.709869296482936e+18,
4
+ "train_loss": 1.1787635701497396,
5
+ "train_runtime": 4443.9077,
6
  "train_samples": 47607,
7
+ "train_samples_per_second": 10.801,
8
+ "train_steps_per_second": 0.675
9
  }
trainer_state.json CHANGED
@@ -1,381 +1,204 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.0161290322580645,
5
  "eval_steps": 200,
6
- "global_step": 6000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.06720430107526881,
13
- "eval_loss": 3.080493211746216,
14
- "eval_runtime": 198.5178,
15
- "eval_samples_per_second": 35.372,
16
- "eval_steps_per_second": 0.554,
17
  "eval_wer": 1.0,
18
  "step": 200
19
  },
20
  {
21
  "epoch": 0.13440860215053763,
22
- "eval_loss": 2.92264461517334,
23
- "eval_runtime": 201.3326,
24
- "eval_samples_per_second": 34.878,
25
- "eval_steps_per_second": 0.546,
26
- "eval_wer": 1.0,
27
  "step": 400
28
  },
29
  {
30
  "epoch": 0.16801075268817203,
31
- "grad_norm": 5.131893634796143,
32
- "learning_rate": 0.00024599999999999996,
33
- "loss": 4.4646,
34
  "step": 500
35
  },
36
  {
37
  "epoch": 0.20161290322580644,
38
- "eval_loss": 1.3392444849014282,
39
- "eval_runtime": 198.1589,
40
- "eval_samples_per_second": 35.436,
41
- "eval_steps_per_second": 0.555,
42
- "eval_wer": 0.8646141989662366,
43
  "step": 600
44
  },
45
  {
46
  "epoch": 0.26881720430107525,
47
- "eval_loss": 1.0115001201629639,
48
- "eval_runtime": 200.6717,
49
- "eval_samples_per_second": 34.992,
50
- "eval_steps_per_second": 0.548,
51
- "eval_wer": 0.7168179221089837,
52
  "step": 800
53
  },
54
  {
55
  "epoch": 0.33602150537634407,
56
- "grad_norm": 6.313941478729248,
57
- "learning_rate": 0.0002782777777777778,
58
- "loss": 1.1042,
59
  "step": 1000
60
  },
61
  {
62
  "epoch": 0.33602150537634407,
63
- "eval_loss": 0.9008844494819641,
64
- "eval_runtime": 199.6418,
65
- "eval_samples_per_second": 35.173,
66
- "eval_steps_per_second": 0.551,
67
- "eval_wer": 0.6766765436691957,
68
  "step": 1000
69
  },
70
  {
71
  "epoch": 0.4032258064516129,
72
- "eval_loss": 0.8225907683372498,
73
- "eval_runtime": 199.7483,
74
- "eval_samples_per_second": 35.154,
75
- "eval_steps_per_second": 0.551,
76
- "eval_wer": 0.5947727182131041,
77
  "step": 1200
78
  },
79
  {
80
  "epoch": 0.47043010752688175,
81
- "eval_loss": 0.8068214058876038,
82
- "eval_runtime": 197.9234,
83
- "eval_samples_per_second": 35.478,
84
- "eval_steps_per_second": 0.556,
85
- "eval_wer": 0.5977358189718174,
86
  "step": 1400
87
  },
88
  {
89
  "epoch": 0.5040322580645161,
90
- "grad_norm": 8.026273727416992,
91
- "learning_rate": 0.0002505555555555555,
92
- "loss": 0.8421,
93
  "step": 1500
94
  },
95
  {
96
  "epoch": 0.5376344086021505,
97
- "eval_loss": 0.7186344861984253,
98
- "eval_runtime": 197.8905,
99
- "eval_samples_per_second": 35.484,
100
- "eval_steps_per_second": 0.556,
101
- "eval_wer": 0.5375171075884612,
102
  "step": 1600
103
  },
104
  {
105
  "epoch": 0.6048387096774194,
106
- "eval_loss": 0.7200700640678406,
107
- "eval_runtime": 197.0513,
108
- "eval_samples_per_second": 35.635,
109
- "eval_steps_per_second": 0.558,
110
- "eval_wer": 0.5560796715343016,
111
  "step": 1800
112
  },
113
  {
114
  "epoch": 0.6720430107526881,
115
- "grad_norm": 4.496196746826172,
116
- "learning_rate": 0.00022283333333333332,
117
- "loss": 0.7637,
118
  "step": 2000
119
  },
120
  {
121
  "epoch": 0.6720430107526881,
122
- "eval_loss": 0.6747044920921326,
123
- "eval_runtime": 198.0259,
124
- "eval_samples_per_second": 35.46,
125
- "eval_steps_per_second": 0.555,
126
- "eval_wer": 0.5086966342895867,
127
  "step": 2000
128
  },
129
  {
130
  "epoch": 0.739247311827957,
131
- "eval_loss": 0.6709386706352234,
132
- "eval_runtime": 199.0402,
133
- "eval_samples_per_second": 35.279,
134
- "eval_steps_per_second": 0.553,
135
- "eval_wer": 0.5050558737161004,
136
  "step": 2200
137
  },
138
  {
139
  "epoch": 0.8064516129032258,
140
- "eval_loss": 0.6484177708625793,
141
- "eval_runtime": 193.7002,
142
- "eval_samples_per_second": 36.252,
143
- "eval_steps_per_second": 0.568,
144
- "eval_wer": 0.48504497800927465,
145
  "step": 2400
146
  },
147
  {
148
  "epoch": 0.8400537634408602,
149
- "grad_norm": 4.925416469573975,
150
- "learning_rate": 0.00019516666666666665,
151
- "loss": 0.7134,
152
  "step": 2500
153
  },
154
  {
155
  "epoch": 0.8736559139784946,
156
- "eval_loss": 0.6387702226638794,
157
- "eval_runtime": 191.5475,
158
- "eval_samples_per_second": 36.659,
159
- "eval_steps_per_second": 0.574,
160
- "eval_wer": 0.4915159648679892,
161
  "step": 2600
162
  },
163
  {
164
  "epoch": 0.9408602150537635,
165
- "eval_loss": 0.627938985824585,
166
- "eval_runtime": 192.2841,
167
- "eval_samples_per_second": 36.519,
168
- "eval_steps_per_second": 0.572,
169
- "eval_wer": 0.4705217980573752,
170
  "step": 2800
171
  },
172
  {
173
  "epoch": 1.0080645161290323,
174
- "grad_norm": 2.991081714630127,
175
- "learning_rate": 0.00016738888888888888,
176
- "loss": 0.6968,
177
  "step": 3000
178
  },
179
  {
180
  "epoch": 1.0080645161290323,
181
- "eval_loss": 0.6451985836029053,
182
- "eval_runtime": 192.2109,
183
- "eval_samples_per_second": 36.533,
184
- "eval_steps_per_second": 0.572,
185
- "eval_wer": 0.4833441847486679,
186
  "step": 3000
187
  },
188
  {
189
- "epoch": 1.075268817204301,
190
- "eval_loss": 0.6439228057861328,
191
- "eval_runtime": 191.7283,
192
- "eval_samples_per_second": 36.625,
193
- "eval_steps_per_second": 0.574,
194
- "eval_wer": 0.4896955845812461,
195
- "step": 3200
196
- },
197
- {
198
- "epoch": 1.14247311827957,
199
- "eval_loss": 0.6749800443649292,
200
- "eval_runtime": 191.0444,
201
- "eval_samples_per_second": 36.756,
202
- "eval_steps_per_second": 0.576,
203
- "eval_wer": 0.5010164897221595,
204
- "step": 3400
205
- },
206
- {
207
- "epoch": 1.1760752688172043,
208
- "grad_norm": 17.283771514892578,
209
- "learning_rate": 0.00013977777777777776,
210
- "loss": 0.667,
211
- "step": 3500
212
- },
213
- {
214
- "epoch": 1.2096774193548387,
215
- "eval_loss": 0.7904173135757446,
216
- "eval_runtime": 191.7132,
217
- "eval_samples_per_second": 36.628,
218
- "eval_steps_per_second": 0.574,
219
- "eval_wer": 0.5591756467664997,
220
- "step": 3600
221
- },
222
- {
223
- "epoch": 1.2768817204301075,
224
- "eval_loss": 0.9479840993881226,
225
- "eval_runtime": 192.8834,
226
- "eval_samples_per_second": 36.405,
227
- "eval_steps_per_second": 0.57,
228
- "eval_wer": 0.5800502265509773,
229
- "step": 3800
230
- },
231
- {
232
- "epoch": 1.3440860215053765,
233
- "grad_norm": 26.34053611755371,
234
- "learning_rate": 0.0001121111111111111,
235
- "loss": 0.9326,
236
- "step": 4000
237
- },
238
- {
239
- "epoch": 1.3440860215053765,
240
- "eval_loss": 1.1457170248031616,
241
- "eval_runtime": 189.9612,
242
- "eval_samples_per_second": 36.965,
243
- "eval_steps_per_second": 0.579,
244
- "eval_wer": 0.6581804169600978,
245
- "step": 4000
246
- },
247
- {
248
- "epoch": 1.4112903225806452,
249
- "eval_loss": 1.118348479270935,
250
- "eval_runtime": 190.0092,
251
- "eval_samples_per_second": 36.956,
252
- "eval_steps_per_second": 0.579,
253
- "eval_wer": 0.6742183659097251,
254
- "step": 4200
255
- },
256
- {
257
- "epoch": 1.478494623655914,
258
- "eval_loss": 1.3061094284057617,
259
- "eval_runtime": 190.4579,
260
- "eval_samples_per_second": 36.869,
261
- "eval_steps_per_second": 0.578,
262
- "eval_wer": 0.78073054385522,
263
- "step": 4400
264
- },
265
- {
266
- "epoch": 1.5120967741935485,
267
- "grad_norm": 15.129014015197754,
268
- "learning_rate": 8.433333333333331e-05,
269
- "loss": 1.2308,
270
- "step": 4500
271
- },
272
- {
273
- "epoch": 1.5456989247311828,
274
- "eval_loss": 1.2298626899719238,
275
- "eval_runtime": 190.6991,
276
- "eval_samples_per_second": 36.822,
277
- "eval_steps_per_second": 0.577,
278
- "eval_wer": 0.7672304973491543,
279
- "step": 4600
280
- },
281
- {
282
- "epoch": 1.6129032258064515,
283
- "eval_loss": 1.178423523902893,
284
- "eval_runtime": 190.1804,
285
- "eval_samples_per_second": 36.923,
286
- "eval_steps_per_second": 0.578,
287
- "eval_wer": 0.7516974713987696,
288
- "step": 4800
289
- },
290
- {
291
- "epoch": 1.6801075268817205,
292
- "grad_norm": 3.6261146068573,
293
- "learning_rate": 5.6555555555555555e-05,
294
- "loss": 1.2835,
295
- "step": 5000
296
- },
297
- {
298
- "epoch": 1.6801075268817205,
299
- "eval_loss": 1.339849829673767,
300
- "eval_runtime": 189.6315,
301
- "eval_samples_per_second": 37.03,
302
- "eval_steps_per_second": 0.58,
303
- "eval_wer": 0.7625267409877888,
304
- "step": 5000
305
- },
306
- {
307
- "epoch": 1.7473118279569892,
308
- "eval_loss": 1.5754368305206299,
309
- "eval_runtime": 190.0111,
310
- "eval_samples_per_second": 36.956,
311
- "eval_steps_per_second": 0.579,
312
- "eval_wer": 0.895573951288218,
313
- "step": 5200
314
- },
315
- {
316
- "epoch": 1.814516129032258,
317
- "eval_loss": 1.728023886680603,
318
- "eval_runtime": 190.3066,
319
- "eval_samples_per_second": 36.898,
320
- "eval_steps_per_second": 0.578,
321
- "eval_wer": 0.9705550166757464,
322
- "step": 5400
323
- },
324
- {
325
- "epoch": 1.8481182795698925,
326
- "grad_norm": 4.430452346801758,
327
- "learning_rate": 2.8777777777777776e-05,
328
- "loss": 1.6466,
329
- "step": 5500
330
- },
331
- {
332
- "epoch": 1.881720430107527,
333
- "eval_loss": 1.7603241205215454,
334
- "eval_runtime": 190.0315,
335
- "eval_samples_per_second": 36.952,
336
- "eval_steps_per_second": 0.579,
337
- "eval_wer": 0.9727208705935503,
338
- "step": 5600
339
- },
340
- {
341
- "epoch": 1.9489247311827957,
342
- "eval_loss": 1.8200371265411377,
343
- "eval_runtime": 190.0189,
344
- "eval_samples_per_second": 36.954,
345
- "eval_steps_per_second": 0.579,
346
- "eval_wer": 0.9943661223242403,
347
- "step": 5800
348
- },
349
- {
350
- "epoch": 2.0161290322580645,
351
- "grad_norm": 8.303234100341797,
352
- "learning_rate": 1.0555555555555555e-06,
353
- "loss": 1.8511,
354
- "step": 6000
355
- },
356
- {
357
- "epoch": 2.0161290322580645,
358
- "eval_loss": 1.8412970304489136,
359
- "eval_runtime": 189.9189,
360
- "eval_samples_per_second": 36.974,
361
- "eval_steps_per_second": 0.579,
362
- "eval_wer": 0.9956417172696953,
363
- "step": 6000
364
- },
365
- {
366
- "epoch": 2.0161290322580645,
367
- "step": 6000,
368
- "total_flos": 1.3379687773166643e+19,
369
- "train_loss": 1.3496905517578126,
370
- "train_runtime": 9965.7858,
371
- "train_samples_per_second": 9.633,
372
- "train_steps_per_second": 0.602
373
  }
374
  ],
375
  "logging_steps": 500,
376
- "max_steps": 6000,
377
  "num_input_tokens_seen": 0,
378
- "num_train_epochs": 3,
379
  "save_steps": 500,
380
  "stateful_callbacks": {
381
  "TrainerControl": {
@@ -389,7 +212,7 @@
389
  "attributes": {}
390
  }
391
  },
392
- "total_flos": 1.3379687773166643e+19,
393
  "train_batch_size": 16,
394
  "trial_name": null,
395
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0080645161290323,
5
  "eval_steps": 200,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.06720430107526881,
13
+ "eval_loss": 2.9829530715942383,
14
+ "eval_runtime": 176.5397,
15
+ "eval_samples_per_second": 39.776,
16
+ "eval_steps_per_second": 2.487,
17
  "eval_wer": 1.0,
18
  "step": 200
19
  },
20
  {
21
  "epoch": 0.13440860215053763,
22
+ "eval_loss": 1.5613375902175903,
23
+ "eval_runtime": 174.5758,
24
+ "eval_samples_per_second": 40.223,
25
+ "eval_steps_per_second": 2.515,
26
+ "eval_wer": 0.9657582481829415,
27
  "step": 400
28
  },
29
  {
30
  "epoch": 0.16801075268817203,
31
+ "grad_norm": 2.138428211212158,
32
+ "learning_rate": 0.00027833333333333334,
33
+ "loss": 3.6118,
34
  "step": 500
35
  },
36
  {
37
  "epoch": 0.20161290322580644,
38
+ "eval_loss": 1.0701438188552856,
39
+ "eval_runtime": 174.6767,
40
+ "eval_samples_per_second": 40.2,
41
+ "eval_steps_per_second": 2.513,
42
+ "eval_wer": 0.7648786191684716,
43
  "step": 600
44
  },
45
  {
46
  "epoch": 0.26881720430107525,
47
+ "eval_loss": 0.8867517113685608,
48
+ "eval_runtime": 174.8944,
49
+ "eval_samples_per_second": 40.15,
50
+ "eval_steps_per_second": 2.51,
51
+ "eval_wer": 0.6947474720631419,
52
  "step": 800
53
  },
54
  {
55
  "epoch": 0.33602150537634407,
56
+ "grad_norm": 2.852606773376465,
57
+ "learning_rate": 0.00022288888888888887,
58
+ "loss": 0.9333,
59
  "step": 1000
60
  },
61
  {
62
  "epoch": 0.33602150537634407,
63
+ "eval_loss": 0.7679557204246521,
64
+ "eval_runtime": 175.5538,
65
+ "eval_samples_per_second": 39.999,
66
+ "eval_steps_per_second": 2.501,
67
+ "eval_wer": 0.6070503195631087,
68
  "step": 1000
69
  },
70
  {
71
  "epoch": 0.4032258064516129,
72
+ "eval_loss": 0.7223904132843018,
73
+ "eval_runtime": 175.6961,
74
+ "eval_samples_per_second": 39.967,
75
+ "eval_steps_per_second": 2.499,
76
+ "eval_wer": 0.5453965638661157,
77
  "step": 1200
78
  },
79
  {
80
  "epoch": 0.47043010752688175,
81
+ "eval_loss": 0.6732765436172485,
82
+ "eval_runtime": 175.6064,
83
+ "eval_samples_per_second": 39.987,
84
+ "eval_steps_per_second": 2.5,
85
+ "eval_wer": 0.5121646580475425,
86
  "step": 1400
87
  },
88
  {
89
  "epoch": 0.5040322580645161,
90
+ "grad_norm": 4.027652740478516,
91
+ "learning_rate": 0.00016744444444444443,
92
+ "loss": 0.7446,
93
  "step": 1500
94
  },
95
  {
96
  "epoch": 0.5376344086021505,
97
+ "eval_loss": 0.6437448859214783,
98
+ "eval_runtime": 175.5304,
99
+ "eval_samples_per_second": 40.004,
100
+ "eval_steps_per_second": 2.501,
101
+ "eval_wer": 0.4966449195445063,
102
  "step": 1600
103
  },
104
  {
105
  "epoch": 0.6048387096774194,
106
+ "eval_loss": 0.6063565015792847,
107
+ "eval_runtime": 175.877,
108
+ "eval_samples_per_second": 39.926,
109
+ "eval_steps_per_second": 2.496,
110
+ "eval_wer": 0.4774312706785899,
111
  "step": 1800
112
  },
113
  {
114
  "epoch": 0.6720430107526881,
115
+ "grad_norm": 3.676745653152466,
116
+ "learning_rate": 0.000112,
117
+ "loss": 0.6579,
118
  "step": 2000
119
  },
120
  {
121
  "epoch": 0.6720430107526881,
122
+ "eval_loss": 0.5673760771751404,
123
+ "eval_runtime": 176.4608,
124
+ "eval_samples_per_second": 39.794,
125
+ "eval_steps_per_second": 2.488,
126
+ "eval_wer": 0.44613933217289625,
127
  "step": 2000
128
  },
129
  {
130
  "epoch": 0.739247311827957,
131
+ "eval_loss": 0.5556111931800842,
132
+ "eval_runtime": 175.9454,
133
+ "eval_samples_per_second": 39.91,
134
+ "eval_steps_per_second": 2.495,
135
+ "eval_wer": 0.4325196986406941,
136
  "step": 2200
137
  },
138
  {
139
  "epoch": 0.8064516129032258,
140
+ "eval_loss": 0.5264282822608948,
141
+ "eval_runtime": 176.4778,
142
+ "eval_samples_per_second": 39.79,
143
+ "eval_steps_per_second": 2.488,
144
+ "eval_wer": 0.4180363810308402,
145
  "step": 2400
146
  },
147
  {
148
  "epoch": 0.8400537634408602,
149
+ "grad_norm": 2.7548441886901855,
150
+ "learning_rate": 5.666666666666666e-05,
151
+ "loss": 0.5823,
152
  "step": 2500
153
  },
154
  {
155
  "epoch": 0.8736559139784946,
156
+ "eval_loss": 0.5129852890968323,
157
+ "eval_runtime": 176.2054,
158
+ "eval_samples_per_second": 39.851,
159
+ "eval_steps_per_second": 2.491,
160
+ "eval_wer": 0.4022110312387887,
161
  "step": 2600
162
  },
163
  {
164
  "epoch": 0.9408602150537635,
165
+ "eval_loss": 0.49821802973747253,
166
+ "eval_runtime": 176.2887,
167
+ "eval_samples_per_second": 39.832,
168
+ "eval_steps_per_second": 2.49,
169
+ "eval_wer": 0.39360076535696725,
170
  "step": 2800
171
  },
172
  {
173
  "epoch": 1.0080645161290323,
174
+ "grad_norm": 0.7031016945838928,
175
+ "learning_rate": 1.111111111111111e-06,
176
+ "loss": 0.5426,
177
  "step": 3000
178
  },
179
  {
180
  "epoch": 1.0080645161290323,
181
+ "eval_loss": 0.4942198395729065,
182
+ "eval_runtime": 177.2018,
183
+ "eval_samples_per_second": 39.627,
184
+ "eval_steps_per_second": 2.477,
185
+ "eval_wer": 0.3917272352808302,
186
  "step": 3000
187
  },
188
  {
189
+ "epoch": 1.0080645161290323,
190
+ "step": 3000,
191
+ "total_flos": 6.709869296482936e+18,
192
+ "train_loss": 1.1787635701497396,
193
+ "train_runtime": 4443.9077,
194
+ "train_samples_per_second": 10.801,
195
+ "train_steps_per_second": 0.675
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  }
197
  ],
198
  "logging_steps": 500,
199
+ "max_steps": 3000,
200
  "num_input_tokens_seen": 0,
201
+ "num_train_epochs": 2,
202
  "save_steps": 500,
203
  "stateful_callbacks": {
204
  "TrainerControl": {
 
212
  "attributes": {}
213
  }
214
  },
215
+ "total_flos": 6.709869296482936e+18,
216
  "train_batch_size": 16,
217
  "trial_name": null,
218
  "trial_params": null