DewiBrynJones commited on
Commit
1a99499
1 Parent(s): 66deead

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
 
 
5
  - generated_from_trainer
6
  metrics:
7
  - wer
@@ -15,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # wav2vec2-xlsr-53-ft-btb-ccv-cy
17
 
18
- This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: nan
21
  - Wer: 1.0
 
2
  license: apache-2.0
3
  base_model: facebook/wav2vec2-large-xlsr-53
4
  tags:
5
+ - automatic-speech-recognition
6
+ - DewiBrynJones/banc-trawsgrifiadau-bangor-clean-with-ccv
7
  - generated_from_trainer
8
  metrics:
9
  - wer
 
17
 
18
  # wav2vec2-xlsr-53-ft-btb-ccv-cy
19
 
20
+ This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on the DEWIBRYNJONES/BANC-TRAWSGRIFIADAU-BANGOR-CLEAN-WITH-CCV - DEFAULT dataset.
21
  It achieves the following results on the evaluation set:
22
  - Loss: nan
23
  - Wer: 1.0
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 0.6412722842118763,
3
- "eval_loss": 0.5226185917854309,
4
- "eval_runtime": 188.9199,
5
  "eval_samples": 7022,
6
- "eval_samples_per_second": 37.169,
7
- "eval_steps_per_second": 0.582,
8
- "eval_wer": 0.40009832711037885,
9
- "total_flos": 1.1393778193380235e+19,
10
- "train_loss": 0.7283544036865235,
11
- "train_runtime": 7737.7643,
12
  "train_samples": 124748,
13
- "train_samples_per_second": 10.339,
14
- "train_steps_per_second": 1.292
15
  }
 
1
  {
2
+ "epoch": 1.2825445684237526,
3
+ "eval_loss": NaN,
4
+ "eval_runtime": 187.7207,
5
  "eval_samples": 7022,
6
+ "eval_samples_per_second": 37.407,
7
+ "eval_steps_per_second": 0.586,
8
+ "eval_wer": 1.0,
9
+ "total_flos": 2.2824984432894013e+19,
10
+ "train_loss": 0.38660173568725587,
11
+ "train_runtime": 15166.2226,
12
  "train_samples": 124748,
13
+ "train_samples_per_second": 10.55,
14
+ "train_steps_per_second": 1.319
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 0.6412722842118763,
3
- "eval_loss": 0.5226185917854309,
4
- "eval_runtime": 188.9199,
5
  "eval_samples": 7022,
6
- "eval_samples_per_second": 37.169,
7
- "eval_steps_per_second": 0.582,
8
- "eval_wer": 0.40009832711037885
9
  }
 
1
  {
2
+ "epoch": 1.2825445684237526,
3
+ "eval_loss": NaN,
4
+ "eval_runtime": 187.7207,
5
  "eval_samples": 7022,
6
+ "eval_samples_per_second": 37.407,
7
+ "eval_steps_per_second": 0.586,
8
+ "eval_wer": 1.0
9
  }
runs/Aug31_06-46-04_4b35055fdbcb/events.out.tfevents.1725100944.4b35055fdbcb.851.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:376693a21f28e333a9b61edf0047d2850c4860dc71aef350782130ce733d6198
3
+ size 412
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 0.6412722842118763,
3
- "total_flos": 1.1393778193380235e+19,
4
- "train_loss": 0.7283544036865235,
5
- "train_runtime": 7737.7643,
6
  "train_samples": 124748,
7
- "train_samples_per_second": 10.339,
8
- "train_steps_per_second": 1.292
9
  }
 
1
  {
2
+ "epoch": 1.2825445684237526,
3
+ "total_flos": 2.2824984432894013e+19,
4
+ "train_loss": 0.38660173568725587,
5
+ "train_runtime": 15166.2226,
6
  "train_samples": 124748,
7
+ "train_samples_per_second": 10.55,
8
+ "train_steps_per_second": 1.319
9
  }
trainer_state.json CHANGED
@@ -1,347 +1,667 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6412722842118763,
5
  "eval_steps": 500,
6
- "global_step": 10000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.03206361421059382,
13
- "grad_norm": 11.533838272094727,
14
  "learning_rate": 0.0002465,
15
- "loss": 4.6618,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.03206361421059382,
20
- "eval_loss": 1.5996026992797852,
21
- "eval_runtime": 185.1067,
22
- "eval_samples_per_second": 37.935,
23
- "eval_steps_per_second": 0.594,
24
- "eval_wer": 0.9161163448889834,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 0.06412722842118763,
29
- "grad_norm": 4.801280975341797,
30
- "learning_rate": 0.0002874574468085106,
31
- "loss": 1.0278,
32
  "step": 1000
33
  },
34
  {
35
  "epoch": 0.06412722842118763,
36
- "eval_loss": 1.1463252305984497,
37
- "eval_runtime": 184.8935,
38
- "eval_samples_per_second": 37.979,
39
- "eval_steps_per_second": 0.595,
40
- "eval_wer": 0.7792157748574922,
41
  "step": 1000
42
  },
43
  {
44
  "epoch": 0.09619084263178146,
45
- "grad_norm": 4.746730327606201,
46
- "learning_rate": 0.0002715,
47
- "loss": 0.8164,
48
  "step": 1500
49
  },
50
  {
51
  "epoch": 0.09619084263178146,
52
- "eval_loss": 1.0590689182281494,
53
- "eval_runtime": 185.3865,
54
- "eval_samples_per_second": 37.878,
55
- "eval_steps_per_second": 0.593,
56
- "eval_wer": 0.7363238948165668,
57
  "step": 1500
58
  },
59
  {
60
  "epoch": 0.12825445684237527,
61
- "grad_norm": 21.847057342529297,
62
- "learning_rate": 0.00025554255319148935,
63
- "loss": 0.7124,
64
  "step": 2000
65
  },
66
  {
67
  "epoch": 0.12825445684237527,
68
- "eval_loss": 0.9373884797096252,
69
- "eval_runtime": 186.5741,
70
- "eval_samples_per_second": 37.637,
71
- "eval_steps_per_second": 0.59,
72
- "eval_wer": 0.6622596632960842,
73
  "step": 2000
74
  },
75
  {
76
  "epoch": 0.16031807105296908,
77
- "grad_norm": 4.238761901855469,
78
- "learning_rate": 0.0002395851063829787,
79
- "loss": 0.6566,
80
  "step": 2500
81
  },
82
  {
83
  "epoch": 0.16031807105296908,
84
- "eval_loss": 0.8721805810928345,
85
- "eval_runtime": 187.0392,
86
- "eval_samples_per_second": 37.543,
87
- "eval_steps_per_second": 0.588,
88
- "eval_wer": 0.615168949893036,
89
  "step": 2500
90
  },
91
  {
92
  "epoch": 0.19238168526356292,
93
- "grad_norm": 5.65878963470459,
94
- "learning_rate": 0.00022362765957446805,
95
- "loss": 0.6101,
96
  "step": 3000
97
  },
98
  {
99
  "epoch": 0.19238168526356292,
100
- "eval_loss": 0.8194323182106018,
101
- "eval_runtime": 187.1888,
102
- "eval_samples_per_second": 37.513,
103
- "eval_steps_per_second": 0.588,
104
- "eval_wer": 0.5927264513214366,
105
  "step": 3000
106
  },
107
  {
108
  "epoch": 0.22444529947415673,
109
- "grad_norm": 6.294471740722656,
110
- "learning_rate": 0.00020767021276595744,
111
- "loss": 0.5777,
112
  "step": 3500
113
  },
114
  {
115
  "epoch": 0.22444529947415673,
116
- "eval_loss": 0.7799355387687683,
117
- "eval_runtime": 187.6349,
118
- "eval_samples_per_second": 37.424,
119
- "eval_steps_per_second": 0.586,
120
- "eval_wer": 0.5707490134070343,
121
  "step": 3500
122
  },
123
  {
124
  "epoch": 0.25650891368475054,
125
- "grad_norm": 3.848400115966797,
126
- "learning_rate": 0.0001917446808510638,
127
- "loss": 0.5431,
128
  "step": 4000
129
  },
130
  {
131
  "epoch": 0.25650891368475054,
132
- "eval_loss": 0.7453157901763916,
133
- "eval_runtime": 187.5991,
134
- "eval_samples_per_second": 37.431,
135
- "eval_steps_per_second": 0.586,
136
- "eval_wer": 0.550233194700966,
137
  "step": 4000
138
  },
139
  {
140
  "epoch": 0.2885725278953444,
141
- "grad_norm": 9.198270797729492,
142
- "learning_rate": 0.00017585106382978722,
143
- "loss": 0.512,
144
  "step": 4500
145
  },
146
  {
147
  "epoch": 0.2885725278953444,
148
- "eval_loss": 0.7209311127662659,
149
- "eval_runtime": 188.2502,
150
- "eval_samples_per_second": 37.301,
151
- "eval_steps_per_second": 0.584,
152
- "eval_wer": 0.5346735938558843,
153
  "step": 4500
154
  },
155
  {
156
  "epoch": 0.32063614210593816,
157
- "grad_norm": 7.854990482330322,
158
- "learning_rate": 0.00015989361702127658,
159
- "loss": 0.4953,
160
  "step": 5000
161
  },
162
  {
163
  "epoch": 0.32063614210593816,
164
- "eval_loss": 0.6942155957221985,
165
- "eval_runtime": 187.3133,
166
- "eval_samples_per_second": 37.488,
167
- "eval_steps_per_second": 0.587,
168
- "eval_wer": 0.5225022920846676,
169
  "step": 5000
170
  },
171
  {
172
  "epoch": 0.352699756316532,
173
- "grad_norm": 10.654298782348633,
174
- "learning_rate": 0.00014393617021276595,
175
- "loss": 0.4746,
176
  "step": 5500
177
  },
178
  {
179
  "epoch": 0.352699756316532,
180
- "eval_loss": 0.6680377721786499,
181
- "eval_runtime": 188.6546,
182
- "eval_samples_per_second": 37.221,
183
- "eval_steps_per_second": 0.583,
184
- "eval_wer": 0.495714798230112,
185
  "step": 5500
186
  },
187
  {
188
  "epoch": 0.38476337052712584,
189
- "grad_norm": 19.936861038208008,
190
- "learning_rate": 0.00012801063829787234,
191
- "loss": 0.4535,
192
  "step": 6000
193
  },
194
  {
195
  "epoch": 0.38476337052712584,
196
- "eval_loss": 0.6434958577156067,
197
- "eval_runtime": 188.0277,
198
- "eval_samples_per_second": 37.346,
199
- "eval_steps_per_second": 0.585,
200
- "eval_wer": 0.47308627539563375,
201
  "step": 6000
202
  },
203
  {
204
  "epoch": 0.4168269847377196,
205
- "grad_norm": 6.950649261474609,
206
- "learning_rate": 0.0001120531914893617,
207
- "loss": 0.4249,
208
  "step": 6500
209
  },
210
  {
211
  "epoch": 0.4168269847377196,
212
- "eval_loss": 0.6293027400970459,
213
- "eval_runtime": 187.8772,
214
- "eval_samples_per_second": 37.375,
215
- "eval_steps_per_second": 0.585,
216
- "eval_wer": 0.4816832538301067,
217
  "step": 6500
218
  },
219
  {
220
  "epoch": 0.44889059894831346,
221
- "grad_norm": 5.574142932891846,
222
- "learning_rate": 9.612765957446806e-05,
223
- "loss": 0.4065,
224
  "step": 7000
225
  },
226
  {
227
  "epoch": 0.44889059894831346,
228
- "eval_loss": 0.5997486710548401,
229
- "eval_runtime": 188.7633,
230
- "eval_samples_per_second": 37.2,
231
- "eval_steps_per_second": 0.583,
232
- "eval_wer": 0.44947448145736724,
233
  "step": 7000
234
  },
235
  {
236
  "epoch": 0.4809542131589073,
237
- "grad_norm": 5.602737903594971,
238
- "learning_rate": 8.017021276595744e-05,
239
- "loss": 0.393,
240
  "step": 7500
241
  },
242
  {
243
  "epoch": 0.4809542131589073,
244
- "eval_loss": 0.5802670121192932,
245
- "eval_runtime": 189.0166,
246
- "eval_samples_per_second": 37.15,
247
- "eval_steps_per_second": 0.582,
248
- "eval_wer": 0.44260487117819797,
249
  "step": 7500
250
  },
251
  {
252
  "epoch": 0.5130178273695011,
253
- "grad_norm": 15.019088745117188,
254
- "learning_rate": 6.424468085106383e-05,
255
- "loss": 0.3808,
256
  "step": 8000
257
  },
258
  {
259
  "epoch": 0.5130178273695011,
260
- "eval_loss": 0.5661880970001221,
261
- "eval_runtime": 189.1322,
262
- "eval_samples_per_second": 37.127,
263
- "eval_steps_per_second": 0.582,
264
- "eval_wer": 0.4299153589603901,
265
  "step": 8000
266
  },
267
  {
268
  "epoch": 0.5450814415800949,
269
- "grad_norm": 3.79109525680542,
270
- "learning_rate": 4.8287234042553194e-05,
271
- "loss": 0.3722,
272
  "step": 8500
273
  },
274
  {
275
  "epoch": 0.5450814415800949,
276
- "eval_loss": 0.553141176700592,
277
- "eval_runtime": 188.8506,
278
- "eval_samples_per_second": 37.183,
279
- "eval_steps_per_second": 0.582,
280
- "eval_wer": 0.4194182755550831,
281
  "step": 8500
282
  },
283
  {
284
  "epoch": 0.5771450557906888,
285
- "grad_norm": 12.16059398651123,
286
- "learning_rate": 3.232978723404255e-05,
287
- "loss": 0.3622,
288
  "step": 9000
289
  },
290
  {
291
  "epoch": 0.5771450557906888,
292
- "eval_loss": 0.5399113297462463,
293
- "eval_runtime": 188.6804,
294
- "eval_samples_per_second": 37.216,
295
- "eval_steps_per_second": 0.583,
296
- "eval_wer": 0.40727354867856336,
297
  "step": 9000
298
  },
299
  {
300
  "epoch": 0.6092086700012825,
301
- "grad_norm": 7.863190650939941,
302
- "learning_rate": 1.6372340425531912e-05,
303
- "loss": 0.3526,
304
  "step": 9500
305
  },
306
  {
307
  "epoch": 0.6092086700012825,
308
- "eval_loss": 0.5277913808822632,
309
- "eval_runtime": 188.1946,
310
- "eval_samples_per_second": 37.312,
311
- "eval_steps_per_second": 0.585,
312
- "eval_wer": 0.40279567892212226,
313
  "step": 9500
314
  },
315
  {
316
  "epoch": 0.6412722842118763,
317
- "grad_norm": 5.124056339263916,
318
- "learning_rate": 4.1489361702127654e-07,
319
- "loss": 0.3337,
320
  "step": 10000
321
  },
322
  {
323
  "epoch": 0.6412722842118763,
324
- "eval_loss": 0.5226185917854309,
325
- "eval_runtime": 188.4518,
326
- "eval_samples_per_second": 37.262,
327
- "eval_steps_per_second": 0.584,
328
- "eval_wer": 0.40009832711037885,
329
  "step": 10000
330
  },
331
  {
332
- "epoch": 0.6412722842118763,
333
- "step": 10000,
334
- "total_flos": 1.1393778193380235e+19,
335
- "train_loss": 0.7283544036865235,
336
- "train_runtime": 7737.7643,
337
- "train_samples_per_second": 10.339,
338
- "train_steps_per_second": 1.292
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
  }
340
  ],
341
  "logging_steps": 500,
342
- "max_steps": 10000,
343
  "num_input_tokens_seen": 0,
344
- "num_train_epochs": 1,
345
  "save_steps": 500,
346
  "stateful_callbacks": {
347
  "TrainerControl": {
@@ -355,7 +675,7 @@
355
  "attributes": {}
356
  }
357
  },
358
- "total_flos": 1.1393778193380235e+19,
359
  "train_batch_size": 8,
360
  "trial_name": null,
361
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.2825445684237526,
5
  "eval_steps": 500,
6
+ "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.03206361421059382,
13
+ "grad_norm": 9.577472686767578,
14
  "learning_rate": 0.0002465,
15
+ "loss": 4.7126,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.03206361421059382,
20
+ "eval_loss": 1.7046922445297241,
21
+ "eval_runtime": 189.0412,
22
+ "eval_samples_per_second": 37.145,
23
+ "eval_steps_per_second": 0.582,
24
+ "eval_wer": 0.9345593218086873,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 0.06412722842118763,
29
+ "grad_norm": 6.515851020812988,
30
+ "learning_rate": 0.0002939226804123711,
31
+ "loss": 1.0533,
32
  "step": 1000
33
  },
34
  {
35
  "epoch": 0.06412722842118763,
36
+ "eval_loss": 1.1487088203430176,
37
+ "eval_runtime": 189.0728,
38
+ "eval_samples_per_second": 37.139,
39
+ "eval_steps_per_second": 0.582,
40
+ "eval_wer": 0.7906695544718904,
41
  "step": 1000
42
  },
43
  {
44
  "epoch": 0.09619084263178146,
45
+ "grad_norm": 3.676572799682617,
46
+ "learning_rate": 0.0002861907216494845,
47
+ "loss": 0.8268,
48
  "step": 1500
49
  },
50
  {
51
  "epoch": 0.09619084263178146,
52
+ "eval_loss": 1.060188889503479,
53
+ "eval_runtime": 190.9733,
54
+ "eval_samples_per_second": 36.77,
55
+ "eval_steps_per_second": 0.576,
56
+ "eval_wer": 0.7815012158014324,
57
  "step": 1500
58
  },
59
  {
60
  "epoch": 0.12825445684237527,
61
+ "grad_norm": 9.430739402770996,
62
+ "learning_rate": 0.00027845876288659795,
63
+ "loss": 0.7188,
64
  "step": 2000
65
  },
66
  {
67
  "epoch": 0.12825445684237527,
68
+ "eval_loss": 0.9336337447166443,
69
+ "eval_runtime": 191.0896,
70
+ "eval_samples_per_second": 36.747,
71
+ "eval_steps_per_second": 0.576,
72
+ "eval_wer": 0.671746900702906,
73
  "step": 2000
74
  },
75
  {
76
  "epoch": 0.16031807105296908,
77
+ "grad_norm": 2.9828200340270996,
78
+ "learning_rate": 0.00027072680412371135,
79
+ "loss": 0.6725,
80
  "step": 2500
81
  },
82
  {
83
  "epoch": 0.16031807105296908,
84
+ "eval_loss": 0.9303568005561829,
85
+ "eval_runtime": 191.2157,
86
+ "eval_samples_per_second": 36.723,
87
+ "eval_steps_per_second": 0.575,
88
+ "eval_wer": 0.6560677128316879,
89
  "step": 2500
90
  },
91
  {
92
  "epoch": 0.19238168526356292,
93
+ "grad_norm": 4.710850238800049,
94
+ "learning_rate": 0.0002629948453608247,
95
+ "loss": 0.6295,
96
  "step": 3000
97
  },
98
  {
99
  "epoch": 0.19238168526356292,
100
+ "eval_loss": 0.8600214719772339,
101
+ "eval_runtime": 191.6797,
102
+ "eval_samples_per_second": 36.634,
103
+ "eval_steps_per_second": 0.574,
104
+ "eval_wer": 0.6257324705350855,
105
  "step": 3000
106
  },
107
  {
108
  "epoch": 0.22444529947415673,
109
+ "grad_norm": 4.912868976593018,
110
+ "learning_rate": 0.0002552628865979381,
111
+ "loss": 0.6003,
112
  "step": 3500
113
  },
114
  {
115
  "epoch": 0.22444529947415673,
116
+ "eval_loss": 0.8395254611968994,
117
+ "eval_runtime": 191.3108,
118
+ "eval_samples_per_second": 36.705,
119
+ "eval_steps_per_second": 0.575,
120
+ "eval_wer": 0.6113288776093224,
121
  "step": 3500
122
  },
123
  {
124
  "epoch": 0.25650891368475054,
125
+ "grad_norm": 4.513955116271973,
126
+ "learning_rate": 0.00024754639175257734,
127
+ "loss": 0.5847,
128
  "step": 4000
129
  },
130
  {
131
  "epoch": 0.25650891368475054,
132
+ "eval_loss": 0.7883865833282471,
133
+ "eval_runtime": 192.8783,
134
+ "eval_samples_per_second": 36.406,
135
+ "eval_steps_per_second": 0.57,
136
+ "eval_wer": 0.5861491648839341,
137
  "step": 4000
138
  },
139
  {
140
  "epoch": 0.2885725278953444,
141
+ "grad_norm": 16.630624771118164,
142
+ "learning_rate": 0.00023984536082474227,
143
+ "loss": 0.5521,
144
  "step": 4500
145
  },
146
  {
147
  "epoch": 0.2885725278953444,
148
+ "eval_loss": 0.7741186618804932,
149
+ "eval_runtime": 189.6516,
150
+ "eval_samples_per_second": 37.026,
151
+ "eval_steps_per_second": 0.58,
152
+ "eval_wer": 0.5686628841733214,
153
  "step": 4500
154
  },
155
  {
156
  "epoch": 0.32063614210593816,
157
+ "grad_norm": 7.58245325088501,
158
+ "learning_rate": 0.00023211340206185567,
159
+ "loss": 0.5477,
160
  "step": 5000
161
  },
162
  {
163
  "epoch": 0.32063614210593816,
164
+ "eval_loss": 0.7594121098518372,
165
+ "eval_runtime": 190.5466,
166
+ "eval_samples_per_second": 36.852,
167
+ "eval_steps_per_second": 0.577,
168
+ "eval_wer": 0.5535550565380885,
169
  "step": 5000
170
  },
171
  {
172
  "epoch": 0.352699756316532,
173
+ "grad_norm": 5.051167011260986,
174
+ "learning_rate": 0.00022438144329896904,
175
+ "loss": 0.5346,
176
  "step": 5500
177
  },
178
  {
179
  "epoch": 0.352699756316532,
180
+ "eval_loss": 0.7481973767280579,
181
+ "eval_runtime": 190.5699,
182
+ "eval_samples_per_second": 36.847,
183
+ "eval_steps_per_second": 0.577,
184
+ "eval_wer": 0.5394039251119468,
185
  "step": 5500
186
  },
187
  {
188
  "epoch": 0.38476337052712584,
189
+ "grad_norm": 4.212076187133789,
190
+ "learning_rate": 0.00021666494845360825,
191
+ "loss": 0.5154,
192
  "step": 6000
193
  },
194
  {
195
  "epoch": 0.38476337052712584,
196
+ "eval_loss": 0.7294158935546875,
197
+ "eval_runtime": 189.7232,
198
+ "eval_samples_per_second": 37.012,
199
+ "eval_steps_per_second": 0.58,
200
+ "eval_wer": 0.53515194196043,
201
  "step": 6000
202
  },
203
  {
204
  "epoch": 0.4168269847377196,
205
+ "grad_norm": 5.682095527648926,
206
+ "learning_rate": 0.00020893298969072165,
207
+ "loss": 0.492,
208
  "step": 6500
209
  },
210
  {
211
  "epoch": 0.4168269847377196,
212
+ "eval_loss": 0.7247592806816101,
213
+ "eval_runtime": 190.6553,
214
+ "eval_samples_per_second": 36.831,
215
+ "eval_steps_per_second": 0.577,
216
+ "eval_wer": 0.5492632110445262,
217
  "step": 6500
218
  },
219
  {
220
  "epoch": 0.44889059894831346,
221
+ "grad_norm": 8.364203453063965,
222
+ "learning_rate": 0.0002012164948453608,
223
+ "loss": 0.4759,
224
  "step": 7000
225
  },
226
  {
227
  "epoch": 0.44889059894831346,
228
+ "eval_loss": 0.7076719403266907,
229
+ "eval_runtime": 189.5572,
230
+ "eval_samples_per_second": 37.044,
231
+ "eval_steps_per_second": 0.58,
232
+ "eval_wer": 0.5134402529929976,
233
  "step": 7000
234
  },
235
  {
236
  "epoch": 0.4809542131589073,
237
+ "grad_norm": 4.447290897369385,
238
+ "learning_rate": 0.0001934845360824742,
239
+ "loss": 0.4655,
240
  "step": 7500
241
  },
242
  {
243
  "epoch": 0.4809542131589073,
244
+ "eval_loss": 0.673875629901886,
245
+ "eval_runtime": 190.3324,
246
+ "eval_samples_per_second": 36.893,
247
+ "eval_steps_per_second": 0.578,
248
+ "eval_wer": 0.5063979058982979,
249
  "step": 7500
250
  },
251
  {
252
  "epoch": 0.5130178273695011,
253
+ "grad_norm": 12.618865013122559,
254
+ "learning_rate": 0.0001857680412371134,
255
+ "loss": 0.4594,
256
  "step": 8000
257
  },
258
  {
259
  "epoch": 0.5130178273695011,
260
+ "eval_loss": 0.6574720144271851,
261
+ "eval_runtime": 190.8303,
262
+ "eval_samples_per_second": 36.797,
263
+ "eval_steps_per_second": 0.576,
264
+ "eval_wer": 0.5067300920820101,
265
  "step": 8000
266
  },
267
  {
268
  "epoch": 0.5450814415800949,
269
+ "grad_norm": 2.756011962890625,
270
+ "learning_rate": 0.0001780360824742268,
271
+ "loss": 0.4538,
272
  "step": 8500
273
  },
274
  {
275
  "epoch": 0.5450814415800949,
276
+ "eval_loss": 0.6492609977722168,
277
+ "eval_runtime": 189.5472,
278
+ "eval_samples_per_second": 37.046,
279
+ "eval_steps_per_second": 0.58,
280
+ "eval_wer": 0.500325542460038,
281
  "step": 8500
282
  },
283
  {
284
  "epoch": 0.5771450557906888,
285
+ "grad_norm": 7.6861491203308105,
286
+ "learning_rate": 0.0001703041237113402,
287
+ "loss": 0.4739,
288
  "step": 9000
289
  },
290
  {
291
  "epoch": 0.5771450557906888,
292
+ "eval_loss": 0.7676782608032227,
293
+ "eval_runtime": 189.9033,
294
+ "eval_samples_per_second": 36.977,
295
+ "eval_steps_per_second": 0.579,
296
+ "eval_wer": 0.5238576117142136,
297
  "step": 9000
298
  },
299
  {
300
  "epoch": 0.6092086700012825,
301
+ "grad_norm": NaN,
302
+ "learning_rate": 0.00016787628865979378,
303
+ "loss": 0.695,
304
  "step": 9500
305
  },
306
  {
307
  "epoch": 0.6092086700012825,
308
+ "eval_loss": NaN,
309
+ "eval_runtime": 186.9158,
310
+ "eval_samples_per_second": 37.568,
311
+ "eval_steps_per_second": 0.589,
312
+ "eval_wer": 1.0,
313
  "step": 9500
314
  },
315
  {
316
  "epoch": 0.6412722842118763,
317
+ "grad_norm": NaN,
318
+ "learning_rate": 0.00016787628865979378,
319
+ "loss": 0.0,
320
  "step": 10000
321
  },
322
  {
323
  "epoch": 0.6412722842118763,
324
+ "eval_loss": NaN,
325
+ "eval_runtime": 186.7613,
326
+ "eval_samples_per_second": 37.599,
327
+ "eval_steps_per_second": 0.589,
328
+ "eval_wer": 1.0,
329
  "step": 10000
330
  },
331
  {
332
+ "epoch": 0.6733358984224702,
333
+ "grad_norm": NaN,
334
+ "learning_rate": 0.00016787628865979378,
335
+ "loss": 0.0,
336
+ "step": 10500
337
+ },
338
+ {
339
+ "epoch": 0.6733358984224702,
340
+ "eval_loss": NaN,
341
+ "eval_runtime": 185.9746,
342
+ "eval_samples_per_second": 37.758,
343
+ "eval_steps_per_second": 0.591,
344
+ "eval_wer": 1.0,
345
+ "step": 10500
346
+ },
347
+ {
348
+ "epoch": 0.705399512633064,
349
+ "grad_norm": NaN,
350
+ "learning_rate": 0.00016787628865979378,
351
+ "loss": 0.0,
352
+ "step": 11000
353
+ },
354
+ {
355
+ "epoch": 0.705399512633064,
356
+ "eval_loss": NaN,
357
+ "eval_runtime": 186.0588,
358
+ "eval_samples_per_second": 37.741,
359
+ "eval_steps_per_second": 0.591,
360
+ "eval_wer": 1.0,
361
+ "step": 11000
362
+ },
363
+ {
364
+ "epoch": 0.7374631268436578,
365
+ "grad_norm": NaN,
366
+ "learning_rate": 0.00016787628865979378,
367
+ "loss": 0.0,
368
+ "step": 11500
369
+ },
370
+ {
371
+ "epoch": 0.7374631268436578,
372
+ "eval_loss": NaN,
373
+ "eval_runtime": 187.0825,
374
+ "eval_samples_per_second": 37.534,
375
+ "eval_steps_per_second": 0.588,
376
+ "eval_wer": 1.0,
377
+ "step": 11500
378
+ },
379
+ {
380
+ "epoch": 0.7695267410542517,
381
+ "grad_norm": NaN,
382
+ "learning_rate": 0.00016787628865979378,
383
+ "loss": 0.0,
384
+ "step": 12000
385
+ },
386
+ {
387
+ "epoch": 0.7695267410542517,
388
+ "eval_loss": NaN,
389
+ "eval_runtime": 186.5183,
390
+ "eval_samples_per_second": 37.648,
391
+ "eval_steps_per_second": 0.59,
392
+ "eval_wer": 1.0,
393
+ "step": 12000
394
+ },
395
+ {
396
+ "epoch": 0.8015903552648455,
397
+ "grad_norm": NaN,
398
+ "learning_rate": 0.00016787628865979378,
399
+ "loss": 0.0,
400
+ "step": 12500
401
+ },
402
+ {
403
+ "epoch": 0.8015903552648455,
404
+ "eval_loss": NaN,
405
+ "eval_runtime": 186.3281,
406
+ "eval_samples_per_second": 37.686,
407
+ "eval_steps_per_second": 0.59,
408
+ "eval_wer": 1.0,
409
+ "step": 12500
410
+ },
411
+ {
412
+ "epoch": 0.8336539694754392,
413
+ "grad_norm": NaN,
414
+ "learning_rate": 0.00016787628865979378,
415
+ "loss": 0.0,
416
+ "step": 13000
417
+ },
418
+ {
419
+ "epoch": 0.8336539694754392,
420
+ "eval_loss": NaN,
421
+ "eval_runtime": 185.5922,
422
+ "eval_samples_per_second": 37.836,
423
+ "eval_steps_per_second": 0.593,
424
+ "eval_wer": 1.0,
425
+ "step": 13000
426
+ },
427
+ {
428
+ "epoch": 0.8657175836860331,
429
+ "grad_norm": NaN,
430
+ "learning_rate": 0.00016787628865979378,
431
+ "loss": 0.0,
432
+ "step": 13500
433
+ },
434
+ {
435
+ "epoch": 0.8657175836860331,
436
+ "eval_loss": NaN,
437
+ "eval_runtime": 185.7237,
438
+ "eval_samples_per_second": 37.809,
439
+ "eval_steps_per_second": 0.592,
440
+ "eval_wer": 1.0,
441
+ "step": 13500
442
+ },
443
+ {
444
+ "epoch": 0.8977811978966269,
445
+ "grad_norm": NaN,
446
+ "learning_rate": 0.00016787628865979378,
447
+ "loss": 0.0,
448
+ "step": 14000
449
+ },
450
+ {
451
+ "epoch": 0.8977811978966269,
452
+ "eval_loss": NaN,
453
+ "eval_runtime": 186.6259,
454
+ "eval_samples_per_second": 37.626,
455
+ "eval_steps_per_second": 0.589,
456
+ "eval_wer": 1.0,
457
+ "step": 14000
458
+ },
459
+ {
460
+ "epoch": 0.9298448121072207,
461
+ "grad_norm": NaN,
462
+ "learning_rate": 0.00016787628865979378,
463
+ "loss": 0.0,
464
+ "step": 14500
465
+ },
466
+ {
467
+ "epoch": 0.9298448121072207,
468
+ "eval_loss": NaN,
469
+ "eval_runtime": 186.1517,
470
+ "eval_samples_per_second": 37.722,
471
+ "eval_steps_per_second": 0.591,
472
+ "eval_wer": 1.0,
473
+ "step": 14500
474
+ },
475
+ {
476
+ "epoch": 0.9619084263178146,
477
+ "grad_norm": NaN,
478
+ "learning_rate": 0.00016787628865979378,
479
+ "loss": 0.0,
480
+ "step": 15000
481
+ },
482
+ {
483
+ "epoch": 0.9619084263178146,
484
+ "eval_loss": NaN,
485
+ "eval_runtime": 186.7927,
486
+ "eval_samples_per_second": 37.592,
487
+ "eval_steps_per_second": 0.589,
488
+ "eval_wer": 1.0,
489
+ "step": 15000
490
+ },
491
+ {
492
+ "epoch": 0.9939720405284084,
493
+ "grad_norm": NaN,
494
+ "learning_rate": 0.00016787628865979378,
495
+ "loss": 0.0,
496
+ "step": 15500
497
+ },
498
+ {
499
+ "epoch": 0.9939720405284084,
500
+ "eval_loss": NaN,
501
+ "eval_runtime": 186.1708,
502
+ "eval_samples_per_second": 37.718,
503
+ "eval_steps_per_second": 0.591,
504
+ "eval_wer": 1.0,
505
+ "step": 15500
506
+ },
507
+ {
508
+ "epoch": 1.0260356547390022,
509
+ "grad_norm": NaN,
510
+ "learning_rate": 0.00016787628865979378,
511
+ "loss": 0.0,
512
+ "step": 16000
513
+ },
514
+ {
515
+ "epoch": 1.0260356547390022,
516
+ "eval_loss": NaN,
517
+ "eval_runtime": 186.1341,
518
+ "eval_samples_per_second": 37.725,
519
+ "eval_steps_per_second": 0.591,
520
+ "eval_wer": 1.0,
521
+ "step": 16000
522
+ },
523
+ {
524
+ "epoch": 1.058099268949596,
525
+ "grad_norm": NaN,
526
+ "learning_rate": 0.00016787628865979378,
527
+ "loss": 0.0,
528
+ "step": 16500
529
+ },
530
+ {
531
+ "epoch": 1.058099268949596,
532
+ "eval_loss": NaN,
533
+ "eval_runtime": 186.4575,
534
+ "eval_samples_per_second": 37.66,
535
+ "eval_steps_per_second": 0.59,
536
+ "eval_wer": 1.0,
537
+ "step": 16500
538
+ },
539
+ {
540
+ "epoch": 1.0901628831601897,
541
+ "grad_norm": NaN,
542
+ "learning_rate": 0.00016787628865979378,
543
+ "loss": 0.0,
544
+ "step": 17000
545
+ },
546
+ {
547
+ "epoch": 1.0901628831601897,
548
+ "eval_loss": NaN,
549
+ "eval_runtime": 185.4444,
550
+ "eval_samples_per_second": 37.866,
551
+ "eval_steps_per_second": 0.593,
552
+ "eval_wer": 1.0,
553
+ "step": 17000
554
+ },
555
+ {
556
+ "epoch": 1.1222264973707836,
557
+ "grad_norm": NaN,
558
+ "learning_rate": 0.00016787628865979378,
559
+ "loss": 0.0,
560
+ "step": 17500
561
+ },
562
+ {
563
+ "epoch": 1.1222264973707836,
564
+ "eval_loss": NaN,
565
+ "eval_runtime": 186.15,
566
+ "eval_samples_per_second": 37.722,
567
+ "eval_steps_per_second": 0.591,
568
+ "eval_wer": 1.0,
569
+ "step": 17500
570
+ },
571
+ {
572
+ "epoch": 1.1542901115813775,
573
+ "grad_norm": NaN,
574
+ "learning_rate": 0.00016787628865979378,
575
+ "loss": 0.0,
576
+ "step": 18000
577
+ },
578
+ {
579
+ "epoch": 1.1542901115813775,
580
+ "eval_loss": NaN,
581
+ "eval_runtime": 186.0027,
582
+ "eval_samples_per_second": 37.752,
583
+ "eval_steps_per_second": 0.591,
584
+ "eval_wer": 1.0,
585
+ "step": 18000
586
+ },
587
+ {
588
+ "epoch": 1.1863537257919712,
589
+ "grad_norm": NaN,
590
+ "learning_rate": 0.00016787628865979378,
591
+ "loss": 0.0,
592
+ "step": 18500
593
+ },
594
+ {
595
+ "epoch": 1.1863537257919712,
596
+ "eval_loss": NaN,
597
+ "eval_runtime": 185.6149,
598
+ "eval_samples_per_second": 37.831,
599
+ "eval_steps_per_second": 0.593,
600
+ "eval_wer": 1.0,
601
+ "step": 18500
602
+ },
603
+ {
604
+ "epoch": 1.218417340002565,
605
+ "grad_norm": NaN,
606
+ "learning_rate": 0.00016787628865979378,
607
+ "loss": 0.0,
608
+ "step": 19000
609
+ },
610
+ {
611
+ "epoch": 1.218417340002565,
612
+ "eval_loss": NaN,
613
+ "eval_runtime": 186.7557,
614
+ "eval_samples_per_second": 37.6,
615
+ "eval_steps_per_second": 0.589,
616
+ "eval_wer": 1.0,
617
+ "step": 19000
618
+ },
619
+ {
620
+ "epoch": 1.250480954213159,
621
+ "grad_norm": NaN,
622
+ "learning_rate": 0.00016787628865979378,
623
+ "loss": 0.0,
624
+ "step": 19500
625
+ },
626
+ {
627
+ "epoch": 1.250480954213159,
628
+ "eval_loss": NaN,
629
+ "eval_runtime": 186.7166,
630
+ "eval_samples_per_second": 37.608,
631
+ "eval_steps_per_second": 0.589,
632
+ "eval_wer": 1.0,
633
+ "step": 19500
634
+ },
635
+ {
636
+ "epoch": 1.2825445684237526,
637
+ "grad_norm": NaN,
638
+ "learning_rate": 0.00016787628865979378,
639
+ "loss": 0.0,
640
+ "step": 20000
641
+ },
642
+ {
643
+ "epoch": 1.2825445684237526,
644
+ "eval_loss": NaN,
645
+ "eval_runtime": 186.0546,
646
+ "eval_samples_per_second": 37.742,
647
+ "eval_steps_per_second": 0.591,
648
+ "eval_wer": 1.0,
649
+ "step": 20000
650
+ },
651
+ {
652
+ "epoch": 1.2825445684237526,
653
+ "step": 20000,
654
+ "total_flos": 2.2824984432894013e+19,
655
+ "train_loss": 0.38660173568725587,
656
+ "train_runtime": 15166.2226,
657
+ "train_samples_per_second": 10.55,
658
+ "train_steps_per_second": 1.319
659
  }
660
  ],
661
  "logging_steps": 500,
662
+ "max_steps": 20000,
663
  "num_input_tokens_seen": 0,
664
+ "num_train_epochs": 2,
665
  "save_steps": 500,
666
  "stateful_callbacks": {
667
  "TrainerControl": {
 
675
  "attributes": {}
676
  }
677
  },
678
+ "total_flos": 2.2824984432894013e+19,
679
  "train_batch_size": 8,
680
  "trial_name": null,
681
  "trial_params": null