DewiBrynJones commited on
Commit
0d4cf0c
1 Parent(s): 35c973a

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
 
 
5
  - generated_from_trainer
6
  metrics:
7
  - wer
@@ -15,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # wav2vec2-xlsr-53-ft-btb-ccv-cy
17
 
18
- This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 0.7821
21
  - Wer: 0.4576
 
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
5
+ - automatic-speech-recognition
6
+ - DewiBrynJones/banc-trawsgrifiadau-bangor-clean-with-ccv
7
  - generated_from_trainer
8
  metrics:
9
  - wer
 
17
 
18
  # wav2vec2-xlsr-53-ft-btb-ccv-cy
19
 
20
+ This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on the DEWIBRYNJONES/BANC-TRAWSGRIFIADAU-BANGOR-CLEAN-WITH-CCV - DEFAULT dataset.
21
  It achieves the following results on the evaluation set:
22
  - Loss: 0.7821
23
  - Wer: 0.4576
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 2.3166023166023164,
3
- "eval_loss": Infinity,
4
- "eval_runtime": 194.093,
5
  "eval_samples": 7022,
6
- "eval_samples_per_second": 36.179,
7
- "eval_steps_per_second": 0.567,
8
- "eval_wer": 0.3263604845935102,
9
- "total_flos": 1.2607274134194512e+19,
10
- "train_loss": 0.8722912038167318,
11
- "train_runtime": 9671.7075,
12
- "train_samples": 41435,
13
- "train_samples_per_second": 9.926,
14
- "train_steps_per_second": 0.62
15
  }
 
1
  {
2
+ "epoch": 1.8933417481855475,
3
+ "eval_loss": 0.7821305990219116,
4
+ "eval_runtime": 188.4729,
5
  "eval_samples": 7022,
6
+ "eval_samples_per_second": 37.257,
7
+ "eval_steps_per_second": 0.584,
8
+ "eval_wer": 0.45755324944524906,
9
+ "total_flos": 1.3297699695003722e+19,
10
+ "train_loss": 0.9828314208984374,
11
+ "train_runtime": 9865.427,
12
+ "train_samples": 50697,
13
+ "train_samples_per_second": 9.731,
14
+ "train_steps_per_second": 0.608
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 2.3166023166023164,
3
- "eval_loss": Infinity,
4
- "eval_runtime": 194.093,
5
  "eval_samples": 7022,
6
- "eval_samples_per_second": 36.179,
7
- "eval_steps_per_second": 0.567,
8
- "eval_wer": 0.3263604845935102
9
  }
 
1
  {
2
+ "epoch": 1.8933417481855475,
3
+ "eval_loss": 0.7821305990219116,
4
+ "eval_runtime": 188.4729,
5
  "eval_samples": 7022,
6
+ "eval_samples_per_second": 37.257,
7
+ "eval_steps_per_second": 0.584,
8
+ "eval_wer": 0.45755324944524906
9
  }
runs/Aug28_14-17-49_6d77da852b30/events.out.tfevents.1724862820.6d77da852b30.31.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e97d4393abe26b409f20689992f7f125f326b45b0281ff9f0aecefd61bd14ba
3
+ size 406
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 2.3166023166023164,
3
- "total_flos": 1.2607274134194512e+19,
4
- "train_loss": 0.8722912038167318,
5
- "train_runtime": 9671.7075,
6
- "train_samples": 41435,
7
- "train_samples_per_second": 9.926,
8
- "train_steps_per_second": 0.62
9
  }
 
1
  {
2
+ "epoch": 1.8933417481855475,
3
+ "total_flos": 1.3297699695003722e+19,
4
+ "train_loss": 0.9828314208984374,
5
+ "train_runtime": 9865.427,
6
+ "train_samples": 50697,
7
+ "train_samples_per_second": 9.731,
8
+ "train_steps_per_second": 0.608
9
  }
trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.3166023166023164,
5
  "eval_steps": 200,
6
  "global_step": 6000,
7
  "is_hyper_param_search": false,
@@ -9,373 +9,373 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.07722007722007722,
13
- "eval_loss": Infinity,
14
- "eval_runtime": 223.773,
15
- "eval_samples_per_second": 31.38,
16
- "eval_steps_per_second": 0.492,
17
  "eval_wer": 1.0,
18
  "step": 200
19
  },
20
  {
21
- "epoch": 0.15444015444015444,
22
- "eval_loss": Infinity,
23
- "eval_runtime": 201.0458,
24
- "eval_samples_per_second": 34.927,
25
- "eval_steps_per_second": 0.547,
26
- "eval_wer": 0.8963291911755158,
27
  "step": 400
28
  },
29
  {
30
- "epoch": 0.19305019305019305,
31
- "grad_norm": 4.3846588134765625,
32
- "learning_rate": 0.00024799999999999996,
33
- "loss": 3.9177,
34
  "step": 500
35
  },
36
  {
37
- "epoch": 0.23166023166023167,
38
- "eval_loss": Infinity,
39
- "eval_runtime": 194.7647,
40
- "eval_samples_per_second": 36.054,
41
- "eval_steps_per_second": 0.565,
42
- "eval_wer": 0.7594821653487074,
43
  "step": 600
44
  },
45
  {
46
- "epoch": 0.3088803088803089,
47
- "eval_loss": Infinity,
48
- "eval_runtime": 194.859,
49
- "eval_samples_per_second": 36.036,
50
- "eval_steps_per_second": 0.565,
51
- "eval_wer": 0.7512108345344293,
52
  "step": 800
53
  },
54
  {
55
- "epoch": 0.3861003861003861,
56
- "grad_norm": 3.36423921585083,
57
- "learning_rate": 0.00027805555555555553,
58
- "loss": 0.9791,
59
  "step": 1000
60
  },
61
  {
62
- "epoch": 0.3861003861003861,
63
- "eval_loss": Infinity,
64
- "eval_runtime": 195.6019,
65
- "eval_samples_per_second": 35.899,
66
- "eval_steps_per_second": 0.562,
67
- "eval_wer": 0.598444418474803,
68
  "step": 1000
69
  },
70
  {
71
- "epoch": 0.46332046332046334,
72
- "eval_loss": Infinity,
73
- "eval_runtime": 197.8836,
74
- "eval_samples_per_second": 35.486,
75
- "eval_steps_per_second": 0.556,
76
- "eval_wer": 0.5867710646254528,
77
  "step": 1200
78
  },
79
  {
80
- "epoch": 0.5405405405405406,
81
- "eval_loss": Infinity,
82
- "eval_runtime": 203.8782,
83
- "eval_samples_per_second": 34.442,
84
- "eval_steps_per_second": 0.54,
85
- "eval_wer": 0.5255476348149014,
86
  "step": 1400
87
  },
88
  {
89
- "epoch": 0.5791505791505791,
90
- "grad_norm": 2.6850786209106445,
91
- "learning_rate": 0.0002503333333333333,
92
- "loss": 0.805,
93
  "step": 1500
94
  },
95
  {
96
- "epoch": 0.6177606177606177,
97
- "eval_loss": Infinity,
98
- "eval_runtime": 199.2388,
99
- "eval_samples_per_second": 35.244,
100
- "eval_steps_per_second": 0.552,
101
- "eval_wer": 0.5281575837845559,
102
  "step": 1600
103
  },
104
  {
105
- "epoch": 0.694980694980695,
106
- "eval_loss": Infinity,
107
- "eval_runtime": 194.9574,
108
- "eval_samples_per_second": 36.018,
109
- "eval_steps_per_second": 0.564,
110
- "eval_wer": 0.4768805266643294,
111
  "step": 1800
112
  },
113
  {
114
- "epoch": 0.7722007722007722,
115
- "grad_norm": 2.9242658615112305,
116
- "learning_rate": 0.0002226111111111111,
117
- "loss": 0.7184,
118
  "step": 2000
119
  },
120
  {
121
- "epoch": 0.7722007722007722,
122
- "eval_loss": Infinity,
123
- "eval_runtime": 198.941,
124
- "eval_samples_per_second": 35.297,
125
- "eval_steps_per_second": 0.553,
126
- "eval_wer": 0.4743095321569086,
127
  "step": 2000
128
  },
129
  {
130
- "epoch": 0.8494208494208494,
131
- "eval_loss": Infinity,
132
- "eval_runtime": 207.8762,
133
- "eval_samples_per_second": 33.78,
134
- "eval_steps_per_second": 0.529,
135
- "eval_wer": 0.46802487891654654,
136
  "step": 2200
137
  },
138
  {
139
- "epoch": 0.9266409266409267,
140
- "eval_loss": Infinity,
141
- "eval_runtime": 215.534,
142
- "eval_samples_per_second": 32.58,
143
- "eval_steps_per_second": 0.51,
144
- "eval_wer": 0.457026735745913,
145
  "step": 2400
146
  },
147
  {
148
- "epoch": 0.9652509652509652,
149
- "grad_norm": 3.865280866622925,
150
- "learning_rate": 0.00019483333333333332,
151
- "loss": 0.6704,
152
  "step": 2500
153
  },
154
  {
155
- "epoch": 1.0038610038610039,
156
- "eval_loss": Infinity,
157
- "eval_runtime": 212.4212,
158
- "eval_samples_per_second": 33.057,
159
- "eval_steps_per_second": 0.518,
160
- "eval_wer": 0.4252528793840001,
161
  "step": 2600
162
  },
163
  {
164
- "epoch": 1.0810810810810811,
165
- "eval_loss": Infinity,
166
- "eval_runtime": 211.2545,
167
- "eval_samples_per_second": 33.24,
168
- "eval_steps_per_second": 0.521,
169
- "eval_wer": 0.4163972316362173,
170
  "step": 2800
171
  },
172
  {
173
- "epoch": 1.1583011583011582,
174
- "grad_norm": 0.6646206974983215,
175
- "learning_rate": 0.00016716666666666665,
176
- "loss": 0.5664,
177
  "step": 3000
178
  },
179
  {
180
- "epoch": 1.1583011583011582,
181
- "eval_loss": Infinity,
182
- "eval_runtime": 202.1583,
183
- "eval_samples_per_second": 34.735,
184
- "eval_steps_per_second": 0.544,
185
- "eval_wer": 0.41592977808941345,
186
  "step": 3000
187
  },
188
  {
189
- "epoch": 1.2355212355212355,
190
- "eval_loss": Infinity,
191
- "eval_runtime": 198.8952,
192
- "eval_samples_per_second": 35.305,
193
- "eval_steps_per_second": 0.553,
194
- "eval_wer": 0.3995039798475582,
195
  "step": 3200
196
  },
197
  {
198
- "epoch": 1.3127413127413128,
199
- "eval_loss": Infinity,
200
- "eval_runtime": 197.172,
201
- "eval_samples_per_second": 35.614,
202
- "eval_steps_per_second": 0.558,
203
- "eval_wer": 0.3940633399555919,
204
  "step": 3400
205
  },
206
  {
207
- "epoch": 1.3513513513513513,
208
- "grad_norm": 1.06748366355896,
209
- "learning_rate": 0.00013944444444444442,
210
- "loss": 0.5359,
211
  "step": 3500
212
  },
213
  {
214
- "epoch": 1.3899613899613898,
215
- "eval_loss": Infinity,
216
- "eval_runtime": 194.4658,
217
- "eval_samples_per_second": 36.109,
218
- "eval_steps_per_second": 0.566,
219
- "eval_wer": 0.38185760845571526,
220
  "step": 3600
221
  },
222
  {
223
- "epoch": 1.4671814671814671,
224
- "eval_loss": Infinity,
225
- "eval_runtime": 201.7117,
226
- "eval_samples_per_second": 34.812,
227
- "eval_steps_per_second": 0.545,
228
- "eval_wer": 0.3810785192110423,
229
  "step": 3800
230
  },
231
  {
232
- "epoch": 1.5444015444015444,
233
- "grad_norm": 0.8601678013801575,
234
- "learning_rate": 0.00011166666666666667,
235
- "loss": 0.5172,
236
  "step": 4000
237
  },
238
  {
239
- "epoch": 1.5444015444015444,
240
- "eval_loss": Infinity,
241
- "eval_runtime": 196.5164,
242
- "eval_samples_per_second": 35.732,
243
- "eval_steps_per_second": 0.56,
244
- "eval_wer": 0.36905457520158935,
245
  "step": 4000
246
  },
247
  {
248
- "epoch": 1.6216216216216215,
249
- "eval_loss": Infinity,
250
- "eval_runtime": 193.7969,
251
- "eval_samples_per_second": 36.234,
252
- "eval_steps_per_second": 0.568,
253
- "eval_wer": 0.36086115331177854,
254
  "step": 4200
255
  },
256
  {
257
- "epoch": 1.698841698841699,
258
- "eval_loss": Infinity,
259
- "eval_runtime": 196.3411,
260
- "eval_samples_per_second": 35.764,
261
- "eval_steps_per_second": 0.56,
262
- "eval_wer": 0.3599652006804046,
263
  "step": 4400
264
  },
265
  {
266
- "epoch": 1.7374517374517375,
267
- "grad_norm": 0.6527121663093567,
268
- "learning_rate": 8.394444444444443e-05,
269
- "loss": 0.4817,
270
  "step": 4500
271
  },
272
  {
273
- "epoch": 1.776061776061776,
274
- "eval_loss": Infinity,
275
- "eval_runtime": 194.384,
276
- "eval_samples_per_second": 36.124,
277
- "eval_steps_per_second": 0.566,
278
- "eval_wer": 0.35086284133847534,
279
  "step": 4600
280
  },
281
  {
282
- "epoch": 1.8532818532818531,
283
- "eval_loss": Infinity,
284
- "eval_runtime": 196.7828,
285
- "eval_samples_per_second": 35.684,
286
- "eval_steps_per_second": 0.559,
287
- "eval_wer": 0.3529663822990924,
288
  "step": 4800
289
  },
290
  {
291
- "epoch": 1.9305019305019306,
292
- "grad_norm": 0.7631692886352539,
293
- "learning_rate": 5.6166666666666665e-05,
294
- "loss": 0.4818,
295
  "step": 5000
296
  },
297
  {
298
- "epoch": 1.9305019305019306,
299
- "eval_loss": Infinity,
300
- "eval_runtime": 194.6791,
301
- "eval_samples_per_second": 36.07,
302
- "eval_steps_per_second": 0.565,
303
- "eval_wer": 0.34340955423110386,
304
  "step": 5000
305
  },
306
  {
307
- "epoch": 2.0077220077220077,
308
- "eval_loss": Infinity,
309
- "eval_runtime": 200.454,
310
- "eval_samples_per_second": 35.03,
311
- "eval_steps_per_second": 0.549,
312
- "eval_wer": 0.336254918000857,
313
  "step": 5200
314
  },
315
  {
316
- "epoch": 2.0849420849420848,
317
- "eval_loss": Infinity,
318
- "eval_runtime": 193.7233,
319
- "eval_samples_per_second": 36.248,
320
- "eval_steps_per_second": 0.568,
321
- "eval_wer": 0.33718982509446455,
322
  "step": 5400
323
  },
324
  {
325
- "epoch": 2.1235521235521237,
326
- "grad_norm": 1.005771279335022,
327
- "learning_rate": 2.844444444444444e-05,
328
- "loss": 0.4196,
329
  "step": 5500
330
  },
331
  {
332
- "epoch": 2.1621621621621623,
333
- "eval_loss": Infinity,
334
- "eval_runtime": 198.8274,
335
- "eval_samples_per_second": 35.317,
336
- "eval_steps_per_second": 0.553,
337
- "eval_wer": 0.3320348512588784,
338
  "step": 5600
339
  },
340
  {
341
- "epoch": 2.2393822393822393,
342
- "eval_loss": Infinity,
343
- "eval_runtime": 197.3434,
344
- "eval_samples_per_second": 35.583,
345
- "eval_steps_per_second": 0.557,
346
- "eval_wer": 0.3292690844402893,
347
  "step": 5800
348
  },
349
  {
350
- "epoch": 2.3166023166023164,
351
- "grad_norm": 1.702697515487671,
352
- "learning_rate": 7.222222222222222e-07,
353
- "loss": 0.3743,
354
  "step": 6000
355
  },
356
  {
357
- "epoch": 2.3166023166023164,
358
- "eval_loss": Infinity,
359
- "eval_runtime": 192.6595,
360
- "eval_samples_per_second": 36.448,
361
- "eval_steps_per_second": 0.571,
362
- "eval_wer": 0.3263604845935102,
363
  "step": 6000
364
  },
365
  {
366
- "epoch": 2.3166023166023164,
367
  "step": 6000,
368
- "total_flos": 1.2607274134194512e+19,
369
- "train_loss": 0.8722912038167318,
370
- "train_runtime": 9671.7075,
371
- "train_samples_per_second": 9.926,
372
- "train_steps_per_second": 0.62
373
  }
374
  ],
375
  "logging_steps": 500,
376
  "max_steps": 6000,
377
  "num_input_tokens_seen": 0,
378
- "num_train_epochs": 3,
379
  "save_steps": 500,
380
  "stateful_callbacks": {
381
  "TrainerControl": {
@@ -389,7 +389,7 @@
389
  "attributes": {}
390
  }
391
  },
392
- "total_flos": 1.2607274134194512e+19,
393
  "train_batch_size": 16,
394
  "trial_name": null,
395
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.8933417481855475,
5
  "eval_steps": 200,
6
  "global_step": 6000,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.06311139160618491,
13
+ "eval_loss": 3.0427980422973633,
14
+ "eval_runtime": 195.72,
15
+ "eval_samples_per_second": 35.878,
16
+ "eval_steps_per_second": 0.562,
17
  "eval_wer": 1.0,
18
  "step": 200
19
  },
20
  {
21
+ "epoch": 0.12622278321236982,
22
+ "eval_loss": 3.1055634021759033,
23
+ "eval_runtime": 193.3912,
24
+ "eval_samples_per_second": 36.31,
25
+ "eval_steps_per_second": 0.569,
26
+ "eval_wer": 1.0,
27
  "step": 400
28
  },
29
  {
30
+ "epoch": 0.1577784790154623,
31
+ "grad_norm": 6.020185947418213,
32
+ "learning_rate": 0.00024599999999999996,
33
+ "loss": 4.232,
34
  "step": 500
35
  },
36
  {
37
+ "epoch": 0.18933417481855475,
38
+ "eval_loss": 1.2093147039413452,
39
+ "eval_runtime": 194.3659,
40
+ "eval_samples_per_second": 36.128,
41
+ "eval_steps_per_second": 0.566,
42
+ "eval_wer": 0.8286185040991775,
43
  "step": 600
44
  },
45
  {
46
+ "epoch": 0.25244556642473964,
47
+ "eval_loss": 1.0886054039001465,
48
+ "eval_runtime": 194.9365,
49
+ "eval_samples_per_second": 36.022,
50
+ "eval_steps_per_second": 0.564,
51
+ "eval_wer": 0.7324838225328533,
52
  "step": 800
53
  },
54
  {
55
+ "epoch": 0.3155569580309246,
56
+ "grad_norm": 4.922729969024658,
57
+ "learning_rate": 0.00027822222222222224,
58
+ "loss": 1.0379,
59
  "step": 1000
60
  },
61
  {
62
+ "epoch": 0.3155569580309246,
63
+ "eval_loss": 0.9017586708068848,
64
+ "eval_runtime": 193.3111,
65
+ "eval_samples_per_second": 36.325,
66
+ "eval_steps_per_second": 0.569,
67
+ "eval_wer": 0.673049070543058,
68
  "step": 1000
69
  },
70
  {
71
+ "epoch": 0.3786683496371095,
72
+ "eval_loss": 0.8131064772605896,
73
+ "eval_runtime": 192.8235,
74
+ "eval_samples_per_second": 36.417,
75
+ "eval_steps_per_second": 0.57,
76
+ "eval_wer": 0.5803292629452956,
77
  "step": 1200
78
  },
79
  {
80
+ "epoch": 0.4417797412432944,
81
+ "eval_loss": 0.7567442655563354,
82
+ "eval_runtime": 192.8955,
83
+ "eval_samples_per_second": 36.403,
84
+ "eval_steps_per_second": 0.57,
85
+ "eval_wer": 0.5543788782736948,
86
  "step": 1400
87
  },
88
  {
89
+ "epoch": 0.47333543704638686,
90
+ "grad_norm": 25.353076934814453,
91
+ "learning_rate": 0.00025049999999999996,
92
+ "loss": 0.8008,
93
  "step": 1500
94
  },
95
  {
96
+ "epoch": 0.5048911328494793,
97
+ "eval_loss": 0.704021155834198,
98
+ "eval_runtime": 193.2182,
99
+ "eval_samples_per_second": 36.342,
100
+ "eval_steps_per_second": 0.569,
101
+ "eval_wer": 0.5137857266240583,
102
  "step": 1600
103
  },
104
  {
105
+ "epoch": 0.5680025244556642,
106
+ "eval_loss": 0.6949470043182373,
107
+ "eval_runtime": 192.7198,
108
+ "eval_samples_per_second": 36.436,
109
+ "eval_steps_per_second": 0.571,
110
+ "eval_wer": 0.5236450125566378,
111
  "step": 1800
112
  },
113
  {
114
+ "epoch": 0.6311139160618492,
115
+ "grad_norm": NaN,
116
+ "learning_rate": 0.00022277777777777774,
117
+ "loss": 0.7212,
118
  "step": 2000
119
  },
120
  {
121
+ "epoch": 0.6311139160618492,
122
+ "eval_loss": 0.672233521938324,
123
+ "eval_runtime": 194.6494,
124
+ "eval_samples_per_second": 36.075,
125
+ "eval_steps_per_second": 0.565,
126
+ "eval_wer": 0.4992758341195073,
127
  "step": 2000
128
  },
129
  {
130
+ "epoch": 0.694225307668034,
131
+ "eval_loss": 0.6403974890708923,
132
+ "eval_runtime": 192.2183,
133
+ "eval_samples_per_second": 36.531,
134
+ "eval_steps_per_second": 0.572,
135
+ "eval_wer": 0.4761689631804834,
136
  "step": 2200
137
  },
138
  {
139
+ "epoch": 0.757336699274219,
140
+ "eval_loss": 0.6335896849632263,
141
+ "eval_runtime": 192.0564,
142
+ "eval_samples_per_second": 36.562,
143
+ "eval_steps_per_second": 0.573,
144
+ "eval_wer": 0.4685552558497987,
145
  "step": 2400
146
  },
147
  {
148
+ "epoch": 0.7888923950773115,
149
+ "grad_norm": 5.390285968780518,
150
+ "learning_rate": 0.00019505555555555555,
151
+ "loss": 0.6639,
152
  "step": 2500
153
  },
154
  {
155
+ "epoch": 0.8204480908804039,
156
+ "eval_loss": 0.593280553817749,
157
+ "eval_runtime": 191.8655,
158
+ "eval_samples_per_second": 36.599,
159
+ "eval_steps_per_second": 0.573,
160
+ "eval_wer": 0.45866939502252224,
161
  "step": 2600
162
  },
163
  {
164
+ "epoch": 0.8835594824865888,
165
+ "eval_loss": 0.5996308326721191,
166
+ "eval_runtime": 192.1279,
167
+ "eval_samples_per_second": 36.549,
168
+ "eval_steps_per_second": 0.573,
169
+ "eval_wer": 0.45444398676570247,
170
  "step": 2800
171
  },
172
  {
173
+ "epoch": 0.9466708740927737,
174
+ "grad_norm": 7.231433391571045,
175
+ "learning_rate": 0.00016733333333333333,
176
+ "loss": 0.6278,
177
  "step": 3000
178
  },
179
  {
180
+ "epoch": 0.9466708740927737,
181
+ "eval_loss": 0.5639352202415466,
182
+ "eval_runtime": 195.2391,
183
+ "eval_samples_per_second": 35.966,
184
+ "eval_steps_per_second": 0.563,
185
+ "eval_wer": 0.42424161894258494,
186
  "step": 3000
187
  },
188
  {
189
+ "epoch": 1.0097822656989586,
190
+ "eval_loss": 0.566460371017456,
191
+ "eval_runtime": 191.9621,
192
+ "eval_samples_per_second": 36.58,
193
+ "eval_steps_per_second": 0.573,
194
+ "eval_wer": 0.4227534248395541,
195
  "step": 3200
196
  },
197
  {
198
+ "epoch": 1.0728936573051435,
199
+ "eval_loss": 0.5476272106170654,
200
+ "eval_runtime": 193.6396,
201
+ "eval_samples_per_second": 36.263,
202
+ "eval_steps_per_second": 0.568,
203
+ "eval_wer": 0.41909937681871934,
204
  "step": 3400
205
  },
206
  {
207
+ "epoch": 1.104449353108236,
208
+ "grad_norm": 4.519629955291748,
209
+ "learning_rate": 0.00013955555555555555,
210
+ "loss": 0.5528,
211
  "step": 3500
212
  },
213
  {
214
+ "epoch": 1.1360050489113285,
215
+ "eval_loss": 0.5440065860748291,
216
+ "eval_runtime": 191.7996,
217
+ "eval_samples_per_second": 36.611,
218
+ "eval_steps_per_second": 0.574,
219
+ "eval_wer": 0.41887349021379505,
220
  "step": 3600
221
  },
222
  {
223
+ "epoch": 1.1991164405175134,
224
+ "eval_loss": 0.5297770500183105,
225
+ "eval_runtime": 191.4083,
226
+ "eval_samples_per_second": 36.686,
227
+ "eval_steps_per_second": 0.575,
228
+ "eval_wer": 0.4070875244156845,
229
  "step": 3800
230
  },
231
  {
232
+ "epoch": 1.2622278321236984,
233
+ "grad_norm": 1.4278947114944458,
234
+ "learning_rate": 0.00011183333333333332,
235
+ "loss": 0.5103,
236
  "step": 4000
237
  },
238
  {
239
+ "epoch": 1.2622278321236984,
240
+ "eval_loss": 0.5384453535079956,
241
+ "eval_runtime": 195.1408,
242
+ "eval_samples_per_second": 35.984,
243
+ "eval_steps_per_second": 0.564,
244
+ "eval_wer": 0.4025033550804555,
245
  "step": 4000
246
  },
247
  {
248
+ "epoch": 1.325339223729883,
249
+ "eval_loss": 0.531086266040802,
250
+ "eval_runtime": 192.0473,
251
+ "eval_samples_per_second": 36.564,
252
+ "eval_steps_per_second": 0.573,
253
+ "eval_wer": 0.39976614092666657,
254
  "step": 4200
255
  },
256
  {
257
+ "epoch": 1.388450615336068,
258
+ "eval_loss": 0.5395579934120178,
259
+ "eval_runtime": 191.3816,
260
+ "eval_samples_per_second": 36.691,
261
+ "eval_steps_per_second": 0.575,
262
+ "eval_wer": 0.4039383993940924,
263
  "step": 4400
264
  },
265
  {
266
+ "epoch": 1.4200063111391605,
267
+ "grad_norm": 2.4821906089782715,
268
+ "learning_rate": 8.411111111111111e-05,
269
+ "loss": 0.5194,
270
  "step": 4500
271
  },
272
  {
273
+ "epoch": 1.451562006942253,
274
+ "eval_loss": 0.5501742959022522,
275
+ "eval_runtime": 190.6511,
276
+ "eval_samples_per_second": 36.832,
277
+ "eval_steps_per_second": 0.577,
278
+ "eval_wer": 0.4048818081558352,
279
  "step": 4600
280
  },
281
  {
282
+ "epoch": 1.514673398548438,
283
+ "eval_loss": 0.6632032990455627,
284
+ "eval_runtime": 193.4271,
285
+ "eval_samples_per_second": 36.303,
286
+ "eval_steps_per_second": 0.569,
287
+ "eval_wer": 0.4365059328452411,
288
  "step": 4800
289
  },
290
  {
291
+ "epoch": 1.577784790154623,
292
+ "grad_norm": 11.192009925842285,
293
+ "learning_rate": 5.649999999999999e-05,
294
+ "loss": 0.6034,
295
  "step": 5000
296
  },
297
  {
298
+ "epoch": 1.577784790154623,
299
+ "eval_loss": 0.7074605226516724,
300
+ "eval_runtime": 191.8555,
301
+ "eval_samples_per_second": 36.6,
302
+ "eval_steps_per_second": 0.573,
303
+ "eval_wer": 0.4368115441342564,
304
  "step": 5000
305
  },
306
  {
307
+ "epoch": 1.6408961817608079,
308
+ "eval_loss": 0.7465850710868835,
309
+ "eval_runtime": 191.8575,
310
+ "eval_samples_per_second": 36.6,
311
+ "eval_steps_per_second": 0.573,
312
+ "eval_wer": 0.44187406157403103,
313
  "step": 5200
314
  },
315
  {
316
+ "epoch": 1.7040075733669928,
317
+ "eval_loss": 0.7624653577804565,
318
+ "eval_runtime": 189.4967,
319
+ "eval_samples_per_second": 37.056,
320
+ "eval_steps_per_second": 0.58,
321
+ "eval_wer": 0.449819955088428,
322
  "step": 5400
323
  },
324
  {
325
+ "epoch": 1.7355632691700853,
326
+ "grad_norm": 17.51552963256836,
327
+ "learning_rate": 2.8777777777777776e-05,
328
+ "loss": 0.74,
329
  "step": 5500
330
  },
331
  {
332
+ "epoch": 1.7671189649731778,
333
+ "eval_loss": 0.7502115368843079,
334
+ "eval_runtime": 192.6564,
335
+ "eval_samples_per_second": 36.448,
336
+ "eval_steps_per_second": 0.571,
337
+ "eval_wer": 0.4474813643550937,
338
  "step": 5600
339
  },
340
  {
341
+ "epoch": 1.8302303565793627,
342
+ "eval_loss": 0.7739897966384888,
343
+ "eval_runtime": 189.3036,
344
+ "eval_samples_per_second": 37.094,
345
+ "eval_steps_per_second": 0.581,
346
+ "eval_wer": 0.45959951633691654,
347
  "step": 5800
348
  },
349
  {
350
+ "epoch": 1.8933417481855475,
351
+ "grad_norm": 10.696316719055176,
352
+ "learning_rate": 1e-06,
353
+ "loss": 0.7844,
354
  "step": 6000
355
  },
356
  {
357
+ "epoch": 1.8933417481855475,
358
+ "eval_loss": 0.7821305990219116,
359
+ "eval_runtime": 189.7595,
360
+ "eval_samples_per_second": 37.005,
361
+ "eval_steps_per_second": 0.58,
362
+ "eval_wer": 0.45755324944524906,
363
  "step": 6000
364
  },
365
  {
366
+ "epoch": 1.8933417481855475,
367
  "step": 6000,
368
+ "total_flos": 1.3297699695003722e+19,
369
+ "train_loss": 0.9828314208984374,
370
+ "train_runtime": 9865.427,
371
+ "train_samples_per_second": 9.731,
372
+ "train_steps_per_second": 0.608
373
  }
374
  ],
375
  "logging_steps": 500,
376
  "max_steps": 6000,
377
  "num_input_tokens_seen": 0,
378
+ "num_train_epochs": 2,
379
  "save_steps": 500,
380
  "stateful_callbacks": {
381
  "TrainerControl": {
 
389
  "attributes": {}
390
  }
391
  },
392
+ "total_flos": 1.3297699695003722e+19,
393
  "train_batch_size": 16,
394
  "trial_name": null,
395
  "trial_params": null