Roy039 commited on
Commit
cdc4f22
1 Parent(s): 6712dce

End of training

Browse files
README.md CHANGED
@@ -3,6 +3,7 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: facebook/wav2vec2-base
5
  tags:
 
6
  - generated_from_trainer
7
  datasets:
8
  - fleurs
@@ -15,7 +16,7 @@ model-index:
15
  name: Audio Classification
16
  type: audio-classification
17
  dataset:
18
- name: fleurs
19
  type: fleurs
20
  config: bn_in
21
  split: validation
@@ -31,9 +32,9 @@ should probably proofread and complete it, then remove this comment. -->
31
 
32
  # wav2vec2-base-lang-id
33
 
34
- This model is a fine-tuned version of [facebook/wav2vec2-base](https://huggingface.co/facebook/wav2vec2-base) on the fleurs dataset.
35
  It achieves the following results on the evaluation set:
36
- - Loss: 0.0000
37
  - Accuracy: 1.0
38
 
39
  ## Model description
 
3
  license: apache-2.0
4
  base_model: facebook/wav2vec2-base
5
  tags:
6
+ - audio-classification
7
  - generated_from_trainer
8
  datasets:
9
  - fleurs
 
16
  name: Audio Classification
17
  type: audio-classification
18
  dataset:
19
+ name: google/fleurs
20
  type: fleurs
21
  config: bn_in
22
  split: validation
 
32
 
33
  # wav2vec2-base-lang-id
34
 
35
+ This model is a fine-tuned version of [facebook/wav2vec2-base](https://huggingface.co/facebook/wav2vec2-base) on the google/fleurs dataset.
36
  It achieves the following results on the evaluation set:
37
+ - Loss: 0.0001
38
  - Accuracy: 1.0
39
 
40
  ## Model description
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 9.962264150943396,
3
  "eval_accuracy": 1.0,
4
- "eval_loss": 6.413459777832031e-05,
5
- "eval_runtime": 7.1664,
6
- "eval_samples_per_second": 33.35,
7
- "eval_steps_per_second": 33.35,
8
- "total_flos": 2.937323868434565e+18,
9
- "train_loss": 0.1621207386876146,
10
- "train_runtime": 757.3573,
11
- "train_samples_per_second": 27.992,
12
- "train_steps_per_second": 0.871
13
  }
 
1
  {
2
+ "epoch": 10.0,
3
  "eval_accuracy": 1.0,
4
+ "eval_loss": 5.837103526573628e-05,
5
+ "eval_runtime": 9.8113,
6
+ "eval_samples_per_second": 40.973,
7
+ "eval_steps_per_second": 40.973,
8
+ "total_flos": 4.315241031363276e+18,
9
+ "train_loss": 0.13065080859353445,
10
+ "train_runtime": 1086.1024,
11
+ "train_samples_per_second": 27.677,
12
+ "train_steps_per_second": 0.865
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 9.962264150943396,
3
  "eval_accuracy": 1.0,
4
- "eval_loss": 6.413459777832031e-05,
5
- "eval_runtime": 7.1664,
6
- "eval_samples_per_second": 33.35,
7
- "eval_steps_per_second": 33.35
8
  }
 
1
  {
2
+ "epoch": 10.0,
3
  "eval_accuracy": 1.0,
4
+ "eval_loss": 5.837103526573628e-05,
5
+ "eval_runtime": 9.8113,
6
+ "eval_samples_per_second": 40.973,
7
+ "eval_steps_per_second": 40.973
8
  }
runs/Sep18_12-52-12_user/events.out.tfevents.1726645427.user.2655605.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3b22c9014b7f71890514c01e5ca7a5172f2eb8d2d9d0bb88d8a9fa1faaec2d7
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 9.962264150943396,
3
- "total_flos": 2.937323868434565e+18,
4
- "train_loss": 0.1621207386876146,
5
- "train_runtime": 757.3573,
6
- "train_samples_per_second": 27.992,
7
- "train_steps_per_second": 0.871
8
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "total_flos": 4.315241031363276e+18,
4
+ "train_loss": 0.13065080859353445,
5
+ "train_runtime": 1086.1024,
6
+ "train_samples_per_second": 27.677,
7
+ "train_steps_per_second": 0.865
8
  }
trainer_state.json CHANGED
@@ -1,577 +1,773 @@
1
  {
2
  "best_metric": 1.0,
3
- "best_model_checkpoint": "wav2vec2-base-lang-id/checkpoint-66",
4
- "epoch": 9.962264150943396,
5
  "eval_steps": 500,
6
- "global_step": 660,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.1509433962264151,
13
- "grad_norm": 4.237973690032959,
14
- "learning_rate": 4.545454545454545e-05,
15
- "loss": 4.3975,
16
  "step": 10
17
  },
18
  {
19
- "epoch": 0.3018867924528302,
20
- "grad_norm": 7.205986499786377,
21
- "learning_rate": 9.09090909090909e-05,
22
- "loss": 3.27,
23
  "step": 20
24
  },
25
  {
26
- "epoch": 0.4528301886792453,
27
- "grad_norm": 8.798068046569824,
28
- "learning_rate": 0.00013636363636363634,
29
- "loss": 2.1171,
30
  "step": 30
31
  },
32
  {
33
- "epoch": 0.6037735849056604,
34
- "grad_norm": 4.440766334533691,
35
- "learning_rate": 0.0001818181818181818,
36
- "loss": 0.8282,
37
  "step": 40
38
  },
39
  {
40
- "epoch": 0.7547169811320755,
41
- "grad_norm": 0.20978114008903503,
42
- "learning_rate": 0.00022727272727272725,
43
- "loss": 0.0841,
44
  "step": 50
45
  },
46
  {
47
- "epoch": 0.9056603773584906,
48
- "grad_norm": 0.009890351444482803,
49
- "learning_rate": 0.0002727272727272727,
50
- "loss": 0.002,
51
  "step": 60
52
  },
53
  {
54
- "epoch": 0.9962264150943396,
55
- "eval_accuracy": 1.0,
56
- "eval_loss": 6.413459777832031e-05,
57
- "eval_runtime": 7.9133,
58
- "eval_samples_per_second": 30.202,
59
- "eval_steps_per_second": 30.202,
60
- "step": 66
61
- },
62
- {
63
- "epoch": 1.0566037735849056,
64
- "grad_norm": 0.0018947708886116743,
65
- "learning_rate": 0.00029797979797979794,
66
- "loss": 0.0001,
67
  "step": 70
68
  },
69
  {
70
- "epoch": 1.2075471698113207,
71
- "grad_norm": 0.0009346603183075786,
72
- "learning_rate": 0.0002929292929292929,
73
- "loss": 0.0,
74
  "step": 80
75
  },
76
  {
77
- "epoch": 1.3584905660377358,
78
- "grad_norm": 0.0007128348806872964,
79
- "learning_rate": 0.00028787878787878786,
80
- "loss": 0.0,
81
  "step": 90
82
  },
83
  {
84
- "epoch": 1.509433962264151,
85
- "grad_norm": 0.0006472347886301577,
86
- "learning_rate": 0.0002828282828282828,
87
- "loss": 0.0,
 
 
 
 
 
 
 
 
 
88
  "step": 100
89
  },
90
  {
91
- "epoch": 1.6603773584905661,
92
- "grad_norm": 0.0006166133098304272,
93
- "learning_rate": 0.0002777777777777778,
94
- "loss": 0.0,
95
  "step": 110
96
  },
97
  {
98
- "epoch": 1.8113207547169812,
99
- "grad_norm": 0.0006039089639671147,
100
- "learning_rate": 0.0002727272727272727,
101
- "loss": 0.0,
102
  "step": 120
103
  },
104
  {
105
- "epoch": 1.9622641509433962,
106
- "grad_norm": 0.000593557779211551,
107
- "learning_rate": 0.00026767676767676764,
108
- "loss": 0.0,
109
  "step": 130
110
  },
111
  {
112
- "epoch": 1.9924528301886792,
113
- "eval_accuracy": 1.0,
114
- "eval_loss": 1.2636184692382812e-05,
115
- "eval_runtime": 7.1786,
116
- "eval_samples_per_second": 33.293,
117
- "eval_steps_per_second": 33.293,
118
- "step": 132
119
- },
120
- {
121
- "epoch": 2.1132075471698113,
122
- "grad_norm": 0.0005885771824978292,
123
- "learning_rate": 0.0002626262626262626,
124
- "loss": 0.0,
125
  "step": 140
126
  },
127
  {
128
- "epoch": 2.2641509433962264,
129
- "grad_norm": 0.0005855397321283817,
130
- "learning_rate": 0.00025757575757575756,
131
- "loss": 0.0,
132
  "step": 150
133
  },
134
  {
135
- "epoch": 2.4150943396226414,
136
- "grad_norm": 0.000579712213948369,
137
- "learning_rate": 0.0002525252525252525,
138
- "loss": 0.0,
139
  "step": 160
140
  },
141
  {
142
- "epoch": 2.5660377358490565,
143
- "grad_norm": 0.0005738649051636457,
144
- "learning_rate": 0.0002474747474747475,
145
- "loss": 0.0,
146
  "step": 170
147
  },
148
  {
149
- "epoch": 2.7169811320754715,
150
- "grad_norm": 0.0005652164691127837,
151
- "learning_rate": 0.0002424242424242424,
152
- "loss": 0.0,
153
  "step": 180
154
  },
155
  {
156
- "epoch": 2.867924528301887,
157
- "grad_norm": 0.0005594562389887869,
158
- "learning_rate": 0.00023737373737373732,
159
- "loss": 0.0,
160
- "step": 190
 
 
161
  },
162
  {
163
- "epoch": 2.988679245283019,
164
- "eval_accuracy": 1.0,
165
- "eval_loss": 1.1682510375976562e-05,
166
- "eval_runtime": 7.0786,
167
- "eval_samples_per_second": 33.764,
168
- "eval_steps_per_second": 33.764,
169
- "step": 198
170
  },
171
  {
172
- "epoch": 3.018867924528302,
173
- "grad_norm": 0.0005571797373704612,
174
- "learning_rate": 0.0002323232323232323,
175
- "loss": 0.0,
176
  "step": 200
177
  },
178
  {
179
- "epoch": 3.169811320754717,
180
- "grad_norm": 0.0005533109069801867,
181
- "learning_rate": 0.00022727272727272725,
182
  "loss": 0.0,
183
  "step": 210
184
  },
185
  {
186
- "epoch": 3.3207547169811322,
187
- "grad_norm": 0.0005470027681440115,
188
- "learning_rate": 0.00022222222222222218,
189
  "loss": 0.0,
190
  "step": 220
191
  },
192
  {
193
- "epoch": 3.4716981132075473,
194
- "grad_norm": 0.0005392919410951436,
195
- "learning_rate": 0.00021717171717171717,
196
  "loss": 0.0,
197
  "step": 230
198
  },
199
  {
200
- "epoch": 3.6226415094339623,
201
- "grad_norm": 0.0005366892437450588,
202
- "learning_rate": 0.0002121212121212121,
203
  "loss": 0.0,
204
  "step": 240
205
  },
206
  {
207
- "epoch": 3.7735849056603774,
208
- "grad_norm": 0.0005334424204193056,
209
- "learning_rate": 0.00020707070707070703,
210
  "loss": 0.0,
211
  "step": 250
212
  },
213
  {
214
- "epoch": 3.9245283018867925,
215
- "grad_norm": 0.000528818869497627,
216
- "learning_rate": 0.00020202020202020202,
217
  "loss": 0.0,
218
  "step": 260
219
  },
220
  {
221
- "epoch": 4.0,
222
- "eval_accuracy": 1.0,
223
- "eval_loss": 1.0967254638671875e-05,
224
- "eval_runtime": 7.0836,
225
- "eval_samples_per_second": 33.74,
226
- "eval_steps_per_second": 33.74,
227
- "step": 265
228
- },
229
- {
230
- "epoch": 4.0754716981132075,
231
- "grad_norm": 0.0005262196063995361,
232
- "learning_rate": 0.00019696969696969695,
233
  "loss": 0.0,
234
  "step": 270
235
  },
236
  {
237
- "epoch": 4.226415094339623,
238
- "grad_norm": 0.000520399131346494,
239
- "learning_rate": 0.0001919191919191919,
240
  "loss": 0.0,
241
  "step": 280
242
  },
243
  {
244
- "epoch": 4.377358490566038,
245
- "grad_norm": 0.0005163260502740741,
246
- "learning_rate": 0.00018686868686868687,
 
 
 
 
 
 
 
 
 
247
  "loss": 0.0,
248
  "step": 290
249
  },
250
  {
251
- "epoch": 4.528301886792453,
252
- "grad_norm": 0.0005078270332887769,
253
- "learning_rate": 0.0001818181818181818,
254
  "loss": 0.0,
255
  "step": 300
256
  },
257
  {
258
- "epoch": 4.679245283018868,
259
- "grad_norm": 0.0005045266589149833,
260
- "learning_rate": 0.00017676767676767674,
261
  "loss": 0.0,
262
  "step": 310
263
  },
264
  {
265
- "epoch": 4.830188679245283,
266
- "grad_norm": 0.0005001064273528755,
267
- "learning_rate": 0.00017171717171717167,
268
  "loss": 0.0,
269
  "step": 320
270
  },
271
  {
272
- "epoch": 4.981132075471698,
273
- "grad_norm": 0.0004999340162612498,
274
- "learning_rate": 0.00016666666666666666,
275
  "loss": 0.0,
276
  "step": 330
277
  },
278
  {
279
- "epoch": 4.996226415094339,
280
- "eval_accuracy": 1.0,
281
- "eval_loss": 1.0251998901367188e-05,
282
- "eval_runtime": 7.0645,
283
- "eval_samples_per_second": 33.831,
284
- "eval_steps_per_second": 33.831,
285
- "step": 331
286
- },
287
- {
288
- "epoch": 5.132075471698113,
289
- "grad_norm": 0.0004946214030496776,
290
- "learning_rate": 0.0001616161616161616,
291
  "loss": 0.0,
292
  "step": 340
293
  },
294
  {
295
- "epoch": 5.283018867924528,
296
- "grad_norm": 0.0004907374386675656,
297
- "learning_rate": 0.00015656565656565653,
298
  "loss": 0.0,
299
  "step": 350
300
  },
301
  {
302
- "epoch": 5.433962264150943,
303
- "grad_norm": 0.00048724733642302454,
304
- "learning_rate": 0.00015151515151515152,
305
  "loss": 0.0,
306
  "step": 360
307
  },
308
  {
309
- "epoch": 5.584905660377358,
310
- "grad_norm": 0.00048625547788105905,
311
- "learning_rate": 0.00014646464646464645,
312
  "loss": 0.0,
313
  "step": 370
314
  },
315
  {
316
- "epoch": 5.735849056603773,
317
- "grad_norm": 0.0004835619474761188,
318
- "learning_rate": 0.0001414141414141414,
 
 
 
 
 
 
 
 
 
319
  "loss": 0.0,
320
  "step": 380
321
  },
322
  {
323
- "epoch": 5.886792452830189,
324
- "grad_norm": 0.00048138212878257036,
325
- "learning_rate": 0.00013636363636363634,
326
  "loss": 0.0,
327
  "step": 390
328
  },
329
  {
330
- "epoch": 5.992452830188679,
331
- "eval_accuracy": 1.0,
332
- "eval_loss": 9.655952453613281e-06,
333
- "eval_runtime": 7.2087,
334
- "eval_samples_per_second": 33.154,
335
- "eval_steps_per_second": 33.154,
336
- "step": 397
337
- },
338
- {
339
- "epoch": 6.037735849056604,
340
- "grad_norm": 0.00047443213406950235,
341
- "learning_rate": 0.0001313131313131313,
342
  "loss": 0.0,
343
  "step": 400
344
  },
345
  {
346
- "epoch": 6.188679245283019,
347
- "grad_norm": 0.00047414255095645785,
348
- "learning_rate": 0.00012626262626262626,
349
  "loss": 0.0,
350
  "step": 410
351
  },
352
  {
353
- "epoch": 6.339622641509434,
354
- "grad_norm": 0.0004742112651001662,
355
- "learning_rate": 0.0001212121212121212,
356
  "loss": 0.0,
357
  "step": 420
358
  },
359
  {
360
- "epoch": 6.490566037735849,
361
- "grad_norm": 0.0004664694133680314,
362
- "learning_rate": 0.00011616161616161616,
363
  "loss": 0.0,
364
  "step": 430
365
  },
366
  {
367
- "epoch": 6.6415094339622645,
368
- "grad_norm": 0.00046457306598313153,
369
- "learning_rate": 0.00011111111111111109,
370
  "loss": 0.0,
371
  "step": 440
372
  },
373
  {
374
- "epoch": 6.7924528301886795,
375
- "grad_norm": 0.0004610670148395002,
376
- "learning_rate": 0.00010606060606060605,
377
  "loss": 0.0,
378
  "step": 450
379
  },
380
  {
381
- "epoch": 6.943396226415095,
382
- "grad_norm": 0.0004591474134940654,
383
- "learning_rate": 0.00010101010101010101,
384
  "loss": 0.0,
385
  "step": 460
386
  },
387
  {
388
- "epoch": 6.988679245283019,
389
- "eval_accuracy": 1.0,
390
- "eval_loss": 9.298324584960938e-06,
391
- "eval_runtime": 7.0591,
392
- "eval_samples_per_second": 33.857,
393
- "eval_steps_per_second": 33.857,
394
- "step": 463
395
  },
396
  {
397
- "epoch": 7.09433962264151,
398
- "grad_norm": 0.00045856498763896525,
399
- "learning_rate": 9.595959595959594e-05,
400
- "loss": 0.0,
 
 
401
  "step": 470
402
  },
403
  {
404
- "epoch": 7.245283018867925,
405
- "grad_norm": 0.00045752941514365375,
406
- "learning_rate": 9.09090909090909e-05,
407
  "loss": 0.0,
408
  "step": 480
409
  },
410
  {
411
- "epoch": 7.39622641509434,
412
- "grad_norm": 0.000454788125352934,
413
- "learning_rate": 8.585858585858584e-05,
414
  "loss": 0.0,
415
  "step": 490
416
  },
417
  {
418
- "epoch": 7.547169811320755,
419
- "grad_norm": 0.00044687636545859277,
420
- "learning_rate": 8.08080808080808e-05,
421
  "loss": 0.0,
422
  "step": 500
423
  },
424
  {
425
- "epoch": 7.69811320754717,
426
- "grad_norm": 0.00044597158557735384,
427
- "learning_rate": 7.575757575757576e-05,
428
  "loss": 0.0,
429
  "step": 510
430
  },
431
  {
432
- "epoch": 7.849056603773585,
433
- "grad_norm": 0.00044422256178222597,
434
- "learning_rate": 7.07070707070707e-05,
435
  "loss": 0.0,
436
  "step": 520
437
  },
438
  {
439
- "epoch": 8.0,
440
- "grad_norm": 0.0004414164577610791,
441
- "learning_rate": 6.565656565656565e-05,
442
  "loss": 0.0,
443
  "step": 530
444
  },
445
  {
446
- "epoch": 8.0,
447
- "eval_accuracy": 1.0,
448
- "eval_loss": 9.059906005859375e-06,
449
- "eval_runtime": 7.273,
450
- "eval_samples_per_second": 32.861,
451
- "eval_steps_per_second": 32.861,
452
- "step": 530
453
- },
454
- {
455
- "epoch": 8.150943396226415,
456
- "grad_norm": 0.00044037611223757267,
457
- "learning_rate": 6.06060606060606e-05,
458
  "loss": 0.0,
459
  "step": 540
460
  },
461
  {
462
- "epoch": 8.30188679245283,
463
- "grad_norm": 0.00044043423258699477,
464
- "learning_rate": 5.5555555555555545e-05,
465
  "loss": 0.0,
466
  "step": 550
467
  },
468
  {
469
- "epoch": 8.452830188679245,
470
- "grad_norm": 0.00043952648411504924,
471
- "learning_rate": 5.0505050505050505e-05,
472
  "loss": 0.0,
473
  "step": 560
474
  },
475
  {
476
- "epoch": 8.60377358490566,
477
- "grad_norm": 0.00043861393351107836,
478
- "learning_rate": 4.545454545454545e-05,
 
 
 
 
 
 
 
 
 
479
  "loss": 0.0,
480
  "step": 570
481
  },
482
  {
483
- "epoch": 8.754716981132075,
484
- "grad_norm": 0.000436424306826666,
485
- "learning_rate": 4.04040404040404e-05,
486
  "loss": 0.0,
487
  "step": 580
488
  },
489
  {
490
- "epoch": 8.90566037735849,
491
- "grad_norm": 0.00043321994598954916,
492
- "learning_rate": 3.535353535353535e-05,
493
  "loss": 0.0,
494
  "step": 590
495
  },
496
  {
497
- "epoch": 8.99622641509434,
498
- "eval_accuracy": 1.0,
499
- "eval_loss": 8.940696716308594e-06,
500
- "eval_runtime": 7.0835,
501
- "eval_samples_per_second": 33.74,
502
- "eval_steps_per_second": 33.74,
503
- "step": 596
504
- },
505
- {
506
- "epoch": 9.056603773584905,
507
- "grad_norm": 0.0004325976187828928,
508
- "learning_rate": 3.03030303030303e-05,
509
  "loss": 0.0,
510
  "step": 600
511
  },
512
  {
513
- "epoch": 9.20754716981132,
514
- "grad_norm": 0.0004321152810007334,
515
- "learning_rate": 2.5252525252525253e-05,
516
  "loss": 0.0,
517
  "step": 610
518
  },
519
  {
520
- "epoch": 9.358490566037736,
521
- "grad_norm": 0.00043118096073158085,
522
- "learning_rate": 2.02020202020202e-05,
523
  "loss": 0.0,
524
  "step": 620
525
  },
526
  {
527
- "epoch": 9.50943396226415,
528
- "grad_norm": 0.00043104574433527887,
529
- "learning_rate": 1.515151515151515e-05,
530
  "loss": 0.0,
531
  "step": 630
532
  },
533
  {
534
- "epoch": 9.660377358490566,
535
- "grad_norm": 0.00043100357288494706,
536
- "learning_rate": 1.01010101010101e-05,
537
  "loss": 0.0,
538
  "step": 640
539
  },
540
  {
541
- "epoch": 9.81132075471698,
542
- "grad_norm": 0.00043099047616124153,
543
- "learning_rate": 5.05050505050505e-06,
544
  "loss": 0.0,
545
  "step": 650
546
  },
547
  {
548
- "epoch": 9.962264150943396,
549
- "grad_norm": 0.00043099172762595117,
550
- "learning_rate": 0.0,
 
 
 
 
 
 
 
 
 
551
  "loss": 0.0,
552
  "step": 660
553
  },
554
  {
555
- "epoch": 9.962264150943396,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
556
  "eval_accuracy": 1.0,
557
- "eval_loss": 8.821487426757812e-06,
558
- "eval_runtime": 7.157,
559
- "eval_samples_per_second": 33.394,
560
- "eval_steps_per_second": 33.394,
561
- "step": 660
 
 
 
 
 
 
 
562
  },
563
  {
564
- "epoch": 9.962264150943396,
565
- "step": 660,
566
- "total_flos": 2.937323868434565e+18,
567
- "train_loss": 0.1621207386876146,
568
- "train_runtime": 757.3573,
569
- "train_samples_per_second": 27.992,
570
- "train_steps_per_second": 0.871
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
571
  }
572
  ],
573
  "logging_steps": 10,
574
- "max_steps": 660,
575
  "num_input_tokens_seen": 0,
576
  "num_train_epochs": 10,
577
  "save_steps": 500,
@@ -587,7 +783,7 @@
587
  "attributes": {}
588
  }
589
  },
590
- "total_flos": 2.937323868434565e+18,
591
  "train_batch_size": 8,
592
  "trial_name": null,
593
  "trial_params": null
 
1
  {
2
  "best_metric": 1.0,
3
+ "best_model_checkpoint": "wav2vec2-base-lang-id/checkpoint-94",
4
+ "epoch": 10.0,
5
  "eval_steps": 500,
6
+ "global_step": 940,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.10638297872340426,
13
+ "grad_norm": 3.8006672859191895,
14
+ "learning_rate": 3.1914893617021275e-05,
15
+ "loss": 4.4798,
16
  "step": 10
17
  },
18
  {
19
+ "epoch": 0.2127659574468085,
20
+ "grad_norm": 6.771675109863281,
21
+ "learning_rate": 6.382978723404255e-05,
22
+ "loss": 3.4754,
23
  "step": 20
24
  },
25
  {
26
+ "epoch": 0.3191489361702128,
27
+ "grad_norm": 8.21432113647461,
28
+ "learning_rate": 9.574468085106382e-05,
29
+ "loss": 2.4467,
30
  "step": 30
31
  },
32
  {
33
+ "epoch": 0.425531914893617,
34
+ "grad_norm": 7.680928707122803,
35
+ "learning_rate": 0.0001276595744680851,
36
+ "loss": 1.4232,
37
  "step": 40
38
  },
39
  {
40
+ "epoch": 0.5319148936170213,
41
+ "grad_norm": 2.195053815841675,
42
+ "learning_rate": 0.00015957446808510637,
43
+ "loss": 0.4151,
44
  "step": 50
45
  },
46
  {
47
+ "epoch": 0.6382978723404256,
48
+ "grad_norm": 0.13116297125816345,
49
+ "learning_rate": 0.00019148936170212765,
50
+ "loss": 0.0361,
51
  "step": 60
52
  },
53
  {
54
+ "epoch": 0.7446808510638298,
55
+ "grad_norm": 0.014657862484455109,
56
+ "learning_rate": 0.0002234042553191489,
57
+ "loss": 0.0017,
 
 
 
 
 
 
 
 
 
58
  "step": 70
59
  },
60
  {
61
+ "epoch": 0.851063829787234,
62
+ "grad_norm": 0.004712587222456932,
63
+ "learning_rate": 0.0002553191489361702,
64
+ "loss": 0.0003,
65
  "step": 80
66
  },
67
  {
68
+ "epoch": 0.9574468085106383,
69
+ "grad_norm": 0.0027682166546583176,
70
+ "learning_rate": 0.0002872340425531915,
71
+ "loss": 0.0001,
72
  "step": 90
73
  },
74
  {
75
+ "epoch": 1.0,
76
+ "eval_accuracy": 1.0,
77
+ "eval_loss": 5.837103526573628e-05,
78
+ "eval_runtime": 11.542,
79
+ "eval_samples_per_second": 34.829,
80
+ "eval_steps_per_second": 34.829,
81
+ "step": 94
82
+ },
83
+ {
84
+ "epoch": 1.0638297872340425,
85
+ "grad_norm": 0.0021660495549440384,
86
+ "learning_rate": 0.00029787234042553186,
87
+ "loss": 0.0001,
88
  "step": 100
89
  },
90
  {
91
+ "epoch": 1.1702127659574468,
92
+ "grad_norm": 0.001946191769093275,
93
+ "learning_rate": 0.00029432624113475173,
94
+ "loss": 0.0001,
95
  "step": 110
96
  },
97
  {
98
+ "epoch": 1.2765957446808511,
99
+ "grad_norm": 0.001837807591073215,
100
+ "learning_rate": 0.0002907801418439716,
101
+ "loss": 0.0001,
102
  "step": 120
103
  },
104
  {
105
+ "epoch": 1.3829787234042552,
106
+ "grad_norm": 0.0017718354938551784,
107
+ "learning_rate": 0.0002872340425531915,
108
+ "loss": 0.0001,
109
  "step": 130
110
  },
111
  {
112
+ "epoch": 1.4893617021276595,
113
+ "grad_norm": 0.0017246523639187217,
114
+ "learning_rate": 0.00028368794326241134,
115
+ "loss": 0.0001,
 
 
 
 
 
 
 
 
 
116
  "step": 140
117
  },
118
  {
119
+ "epoch": 1.5957446808510638,
120
+ "grad_norm": 0.0016802914906293154,
121
+ "learning_rate": 0.00028014184397163116,
122
+ "loss": 0.0001,
123
  "step": 150
124
  },
125
  {
126
+ "epoch": 1.702127659574468,
127
+ "grad_norm": 0.0016365655465051532,
128
+ "learning_rate": 0.00027659574468085103,
129
+ "loss": 0.0001,
130
  "step": 160
131
  },
132
  {
133
+ "epoch": 1.8085106382978724,
134
+ "grad_norm": 0.001596157904714346,
135
+ "learning_rate": 0.0002730496453900709,
136
+ "loss": 0.0001,
137
  "step": 170
138
  },
139
  {
140
+ "epoch": 1.9148936170212765,
141
+ "grad_norm": 0.0015629915287718177,
142
+ "learning_rate": 0.00026950354609929077,
143
+ "loss": 0.0001,
144
  "step": 180
145
  },
146
  {
147
+ "epoch": 2.0,
148
+ "eval_accuracy": 1.0,
149
+ "eval_loss": 3.635883331298828e-05,
150
+ "eval_runtime": 10.0577,
151
+ "eval_samples_per_second": 39.969,
152
+ "eval_steps_per_second": 39.969,
153
+ "step": 188
154
  },
155
  {
156
+ "epoch": 2.021276595744681,
157
+ "grad_norm": 0.001525467843748629,
158
+ "learning_rate": 0.0002659574468085106,
159
+ "loss": 0.0001,
160
+ "step": 190
 
 
161
  },
162
  {
163
+ "epoch": 2.127659574468085,
164
+ "grad_norm": 0.0014883955009281635,
165
+ "learning_rate": 0.00026241134751773046,
166
+ "loss": 0.0001,
167
  "step": 200
168
  },
169
  {
170
+ "epoch": 2.2340425531914896,
171
+ "grad_norm": 0.0014570483472198248,
172
+ "learning_rate": 0.00025886524822695033,
173
  "loss": 0.0,
174
  "step": 210
175
  },
176
  {
177
+ "epoch": 2.3404255319148937,
178
+ "grad_norm": 0.0014193649403750896,
179
+ "learning_rate": 0.0002553191489361702,
180
  "loss": 0.0,
181
  "step": 220
182
  },
183
  {
184
+ "epoch": 2.4468085106382977,
185
+ "grad_norm": 0.0013909874251112342,
186
+ "learning_rate": 0.00025177304964539007,
187
  "loss": 0.0,
188
  "step": 230
189
  },
190
  {
191
+ "epoch": 2.5531914893617023,
192
+ "grad_norm": 0.0013578328071162105,
193
+ "learning_rate": 0.0002482269503546099,
194
  "loss": 0.0,
195
  "step": 240
196
  },
197
  {
198
+ "epoch": 2.6595744680851063,
199
+ "grad_norm": 0.0013242242857813835,
200
+ "learning_rate": 0.00024468085106382976,
201
  "loss": 0.0,
202
  "step": 250
203
  },
204
  {
205
+ "epoch": 2.7659574468085104,
206
+ "grad_norm": 0.0013041673228144646,
207
+ "learning_rate": 0.00024113475177304963,
208
  "loss": 0.0,
209
  "step": 260
210
  },
211
  {
212
+ "epoch": 2.872340425531915,
213
+ "grad_norm": 0.0012726597487926483,
214
+ "learning_rate": 0.0002375886524822695,
 
 
 
 
 
 
 
 
 
215
  "loss": 0.0,
216
  "step": 270
217
  },
218
  {
219
+ "epoch": 2.978723404255319,
220
+ "grad_norm": 0.0012477930868044496,
221
+ "learning_rate": 0.00023404255319148934,
222
  "loss": 0.0,
223
  "step": 280
224
  },
225
  {
226
+ "epoch": 3.0,
227
+ "eval_accuracy": 1.0,
228
+ "eval_loss": 2.9017082852078602e-05,
229
+ "eval_runtime": 9.9982,
230
+ "eval_samples_per_second": 40.207,
231
+ "eval_steps_per_second": 40.207,
232
+ "step": 282
233
+ },
234
+ {
235
+ "epoch": 3.0851063829787235,
236
+ "grad_norm": 0.0012191747082397342,
237
+ "learning_rate": 0.0002304964539007092,
238
  "loss": 0.0,
239
  "step": 290
240
  },
241
  {
242
+ "epoch": 3.1914893617021276,
243
+ "grad_norm": 0.0011935862712562084,
244
+ "learning_rate": 0.00022695035460992905,
245
  "loss": 0.0,
246
  "step": 300
247
  },
248
  {
249
+ "epoch": 3.297872340425532,
250
+ "grad_norm": 0.0011742267524823546,
251
+ "learning_rate": 0.0002234042553191489,
252
  "loss": 0.0,
253
  "step": 310
254
  },
255
  {
256
+ "epoch": 3.404255319148936,
257
+ "grad_norm": 0.0011481853434816003,
258
+ "learning_rate": 0.00021985815602836877,
259
  "loss": 0.0,
260
  "step": 320
261
  },
262
  {
263
+ "epoch": 3.5106382978723403,
264
+ "grad_norm": 0.0011253234697505832,
265
+ "learning_rate": 0.00021631205673758864,
266
  "loss": 0.0,
267
  "step": 330
268
  },
269
  {
270
+ "epoch": 3.617021276595745,
271
+ "grad_norm": 0.0011064092395827174,
272
+ "learning_rate": 0.0002127659574468085,
 
 
 
 
 
 
 
 
 
273
  "loss": 0.0,
274
  "step": 340
275
  },
276
  {
277
+ "epoch": 3.723404255319149,
278
+ "grad_norm": 0.0010885618394240737,
279
+ "learning_rate": 0.00020921985815602835,
280
  "loss": 0.0,
281
  "step": 350
282
  },
283
  {
284
+ "epoch": 3.829787234042553,
285
+ "grad_norm": 0.001064595184288919,
286
+ "learning_rate": 0.0002056737588652482,
287
  "loss": 0.0,
288
  "step": 360
289
  },
290
  {
291
+ "epoch": 3.9361702127659575,
292
+ "grad_norm": 0.0010447927052155137,
293
+ "learning_rate": 0.00020212765957446807,
294
  "loss": 0.0,
295
  "step": 370
296
  },
297
  {
298
+ "epoch": 4.0,
299
+ "eval_accuracy": 1.0,
300
+ "eval_loss": 2.372264862060547e-05,
301
+ "eval_runtime": 9.9011,
302
+ "eval_samples_per_second": 40.602,
303
+ "eval_steps_per_second": 40.602,
304
+ "step": 376
305
+ },
306
+ {
307
+ "epoch": 4.042553191489362,
308
+ "grad_norm": 0.001030342886224389,
309
+ "learning_rate": 0.0001985815602836879,
310
  "loss": 0.0,
311
  "step": 380
312
  },
313
  {
314
+ "epoch": 4.148936170212766,
315
+ "grad_norm": 0.0010120035149157047,
316
+ "learning_rate": 0.0001950354609929078,
317
  "loss": 0.0,
318
  "step": 390
319
  },
320
  {
321
+ "epoch": 4.25531914893617,
322
+ "grad_norm": 0.0009937717113643885,
323
+ "learning_rate": 0.00019148936170212765,
 
 
 
 
 
 
 
 
 
324
  "loss": 0.0,
325
  "step": 400
326
  },
327
  {
328
+ "epoch": 4.361702127659575,
329
+ "grad_norm": 0.0009758667438291013,
330
+ "learning_rate": 0.0001879432624113475,
331
  "loss": 0.0,
332
  "step": 410
333
  },
334
  {
335
+ "epoch": 4.468085106382979,
336
+ "grad_norm": 0.0009558630990795791,
337
+ "learning_rate": 0.00018439716312056736,
338
  "loss": 0.0,
339
  "step": 420
340
  },
341
  {
342
+ "epoch": 4.574468085106383,
343
+ "grad_norm": 0.0009456143015995622,
344
+ "learning_rate": 0.0001808510638297872,
345
  "loss": 0.0,
346
  "step": 430
347
  },
348
  {
349
+ "epoch": 4.680851063829787,
350
+ "grad_norm": 0.0009262987296096981,
351
+ "learning_rate": 0.00017730496453900708,
352
  "loss": 0.0,
353
  "step": 440
354
  },
355
  {
356
+ "epoch": 4.787234042553192,
357
+ "grad_norm": 0.0009140170877799392,
358
+ "learning_rate": 0.00017375886524822692,
359
  "loss": 0.0,
360
  "step": 450
361
  },
362
  {
363
+ "epoch": 4.8936170212765955,
364
+ "grad_norm": 0.000904095999430865,
365
+ "learning_rate": 0.00017021276595744682,
366
  "loss": 0.0,
367
  "step": 460
368
  },
369
  {
370
+ "epoch": 5.0,
371
+ "grad_norm": 0.0008834420586936176,
372
+ "learning_rate": 0.00016666666666666666,
373
+ "loss": 0.0,
374
+ "step": 470
 
 
375
  },
376
  {
377
+ "epoch": 5.0,
378
+ "eval_accuracy": 1.0,
379
+ "eval_loss": 2.014636993408203e-05,
380
+ "eval_runtime": 9.9558,
381
+ "eval_samples_per_second": 40.379,
382
+ "eval_steps_per_second": 40.379,
383
  "step": 470
384
  },
385
  {
386
+ "epoch": 5.1063829787234045,
387
+ "grad_norm": 0.0008752320427447557,
388
+ "learning_rate": 0.0001631205673758865,
389
  "loss": 0.0,
390
  "step": 480
391
  },
392
  {
393
+ "epoch": 5.212765957446808,
394
+ "grad_norm": 0.0008663799380883574,
395
+ "learning_rate": 0.00015957446808510637,
396
  "loss": 0.0,
397
  "step": 490
398
  },
399
  {
400
+ "epoch": 5.319148936170213,
401
+ "grad_norm": 0.0008535313536413014,
402
+ "learning_rate": 0.00015602836879432622,
403
  "loss": 0.0,
404
  "step": 500
405
  },
406
  {
407
+ "epoch": 5.425531914893617,
408
+ "grad_norm": 0.0008452454931102693,
409
+ "learning_rate": 0.00015248226950354606,
410
  "loss": 0.0,
411
  "step": 510
412
  },
413
  {
414
+ "epoch": 5.531914893617021,
415
+ "grad_norm": 0.0008268958772532642,
416
+ "learning_rate": 0.00014893617021276593,
417
  "loss": 0.0,
418
  "step": 520
419
  },
420
  {
421
+ "epoch": 5.638297872340425,
422
+ "grad_norm": 0.0008181555895134807,
423
+ "learning_rate": 0.0001453900709219858,
424
  "loss": 0.0,
425
  "step": 530
426
  },
427
  {
428
+ "epoch": 5.74468085106383,
429
+ "grad_norm": 0.0008063354762271047,
430
+ "learning_rate": 0.00014184397163120567,
 
 
 
 
 
 
 
 
 
431
  "loss": 0.0,
432
  "step": 540
433
  },
434
  {
435
+ "epoch": 5.851063829787234,
436
+ "grad_norm": 0.0007958101341500878,
437
+ "learning_rate": 0.00013829787234042552,
438
  "loss": 0.0,
439
  "step": 550
440
  },
441
  {
442
+ "epoch": 5.957446808510638,
443
+ "grad_norm": 0.0007865344523452222,
444
+ "learning_rate": 0.00013475177304964539,
445
  "loss": 0.0,
446
  "step": 560
447
  },
448
  {
449
+ "epoch": 6.0,
450
+ "eval_accuracy": 1.0,
451
+ "eval_loss": 1.7642974853515625e-05,
452
+ "eval_runtime": 9.9723,
453
+ "eval_samples_per_second": 40.312,
454
+ "eval_steps_per_second": 40.312,
455
+ "step": 564
456
+ },
457
+ {
458
+ "epoch": 6.0638297872340425,
459
+ "grad_norm": 0.0007770307711325586,
460
+ "learning_rate": 0.00013120567375886523,
461
  "loss": 0.0,
462
  "step": 570
463
  },
464
  {
465
+ "epoch": 6.170212765957447,
466
+ "grad_norm": 0.0007697618566453457,
467
+ "learning_rate": 0.0001276595744680851,
468
  "loss": 0.0,
469
  "step": 580
470
  },
471
  {
472
+ "epoch": 6.276595744680851,
473
+ "grad_norm": 0.0007645227597095072,
474
+ "learning_rate": 0.00012411347517730494,
475
  "loss": 0.0,
476
  "step": 590
477
  },
478
  {
479
+ "epoch": 6.382978723404255,
480
+ "grad_norm": 0.0007574139162898064,
481
+ "learning_rate": 0.00012056737588652481,
 
 
 
 
 
 
 
 
 
482
  "loss": 0.0,
483
  "step": 600
484
  },
485
  {
486
+ "epoch": 6.48936170212766,
487
+ "grad_norm": 0.000741077761631459,
488
+ "learning_rate": 0.00011702127659574467,
489
  "loss": 0.0,
490
  "step": 610
491
  },
492
  {
493
+ "epoch": 6.595744680851064,
494
+ "grad_norm": 0.000734071247279644,
495
+ "learning_rate": 0.00011347517730496453,
496
  "loss": 0.0,
497
  "step": 620
498
  },
499
  {
500
+ "epoch": 6.702127659574468,
501
+ "grad_norm": 0.000728779355995357,
502
+ "learning_rate": 0.00010992907801418438,
503
  "loss": 0.0,
504
  "step": 630
505
  },
506
  {
507
+ "epoch": 6.808510638297872,
508
+ "grad_norm": 0.0007212815107777715,
509
+ "learning_rate": 0.00010638297872340425,
510
  "loss": 0.0,
511
  "step": 640
512
  },
513
  {
514
+ "epoch": 6.914893617021277,
515
+ "grad_norm": 0.0007169453892856836,
516
+ "learning_rate": 0.0001028368794326241,
517
  "loss": 0.0,
518
  "step": 650
519
  },
520
  {
521
+ "epoch": 7.0,
522
+ "eval_accuracy": 1.0,
523
+ "eval_loss": 1.596455513208639e-05,
524
+ "eval_runtime": 9.9126,
525
+ "eval_samples_per_second": 40.554,
526
+ "eval_steps_per_second": 40.554,
527
+ "step": 658
528
+ },
529
+ {
530
+ "epoch": 7.0212765957446805,
531
+ "grad_norm": 0.000711097614839673,
532
+ "learning_rate": 9.929078014184395e-05,
533
  "loss": 0.0,
534
  "step": 660
535
  },
536
  {
537
+ "epoch": 7.127659574468085,
538
+ "grad_norm": 0.0007094301981851459,
539
+ "learning_rate": 9.574468085106382e-05,
540
+ "loss": 0.0,
541
+ "step": 670
542
+ },
543
+ {
544
+ "epoch": 7.23404255319149,
545
+ "grad_norm": 0.0006968958768993616,
546
+ "learning_rate": 9.219858156028368e-05,
547
+ "loss": 0.0,
548
+ "step": 680
549
+ },
550
+ {
551
+ "epoch": 7.340425531914893,
552
+ "grad_norm": 0.0006909930380061269,
553
+ "learning_rate": 8.865248226950354e-05,
554
+ "loss": 0.0,
555
+ "step": 690
556
+ },
557
+ {
558
+ "epoch": 7.446808510638298,
559
+ "grad_norm": 0.0006865290924906731,
560
+ "learning_rate": 8.510638297872341e-05,
561
+ "loss": 0.0,
562
+ "step": 700
563
+ },
564
+ {
565
+ "epoch": 7.553191489361702,
566
+ "grad_norm": 0.0006844609742984176,
567
+ "learning_rate": 8.156028368794325e-05,
568
+ "loss": 0.0,
569
+ "step": 710
570
+ },
571
+ {
572
+ "epoch": 7.659574468085106,
573
+ "grad_norm": 0.0006792008061893284,
574
+ "learning_rate": 7.801418439716311e-05,
575
+ "loss": 0.0,
576
+ "step": 720
577
+ },
578
+ {
579
+ "epoch": 7.76595744680851,
580
+ "grad_norm": 0.0006731408648192883,
581
+ "learning_rate": 7.446808510638297e-05,
582
+ "loss": 0.0,
583
+ "step": 730
584
+ },
585
+ {
586
+ "epoch": 7.872340425531915,
587
+ "grad_norm": 0.0006701324600726366,
588
+ "learning_rate": 7.092198581560284e-05,
589
+ "loss": 0.0,
590
+ "step": 740
591
+ },
592
+ {
593
+ "epoch": 7.9787234042553195,
594
+ "grad_norm": 0.0006633326993323863,
595
+ "learning_rate": 6.737588652482269e-05,
596
+ "loss": 0.0,
597
+ "step": 750
598
+ },
599
+ {
600
+ "epoch": 8.0,
601
  "eval_accuracy": 1.0,
602
+ "eval_loss": 1.4901161193847656e-05,
603
+ "eval_runtime": 9.9494,
604
+ "eval_samples_per_second": 40.405,
605
+ "eval_steps_per_second": 40.405,
606
+ "step": 752
607
+ },
608
+ {
609
+ "epoch": 8.085106382978724,
610
+ "grad_norm": 0.0006632324075326324,
611
+ "learning_rate": 6.382978723404255e-05,
612
+ "loss": 0.0,
613
+ "step": 760
614
  },
615
  {
616
+ "epoch": 8.191489361702128,
617
+ "grad_norm": 0.000655403477139771,
618
+ "learning_rate": 6.028368794326241e-05,
619
+ "loss": 0.0,
620
+ "step": 770
621
+ },
622
+ {
623
+ "epoch": 8.297872340425531,
624
+ "grad_norm": 0.0006541645270772278,
625
+ "learning_rate": 5.6737588652482264e-05,
626
+ "loss": 0.0,
627
+ "step": 780
628
+ },
629
+ {
630
+ "epoch": 8.404255319148936,
631
+ "grad_norm": 0.0006483749020844698,
632
+ "learning_rate": 5.319148936170213e-05,
633
+ "loss": 0.0,
634
+ "step": 790
635
+ },
636
+ {
637
+ "epoch": 8.51063829787234,
638
+ "grad_norm": 0.0006471078377217054,
639
+ "learning_rate": 4.964539007092198e-05,
640
+ "loss": 0.0,
641
+ "step": 800
642
+ },
643
+ {
644
+ "epoch": 8.617021276595745,
645
+ "grad_norm": 0.0006466888007707894,
646
+ "learning_rate": 4.609929078014184e-05,
647
+ "loss": 0.0,
648
+ "step": 810
649
+ },
650
+ {
651
+ "epoch": 8.72340425531915,
652
+ "grad_norm": 0.0006407785695046186,
653
+ "learning_rate": 4.2553191489361704e-05,
654
+ "loss": 0.0,
655
+ "step": 820
656
+ },
657
+ {
658
+ "epoch": 8.829787234042554,
659
+ "grad_norm": 0.0006384547450579703,
660
+ "learning_rate": 3.9007092198581555e-05,
661
+ "loss": 0.0,
662
+ "step": 830
663
+ },
664
+ {
665
+ "epoch": 8.936170212765958,
666
+ "grad_norm": 0.0006379844271577895,
667
+ "learning_rate": 3.546099290780142e-05,
668
+ "loss": 0.0,
669
+ "step": 840
670
+ },
671
+ {
672
+ "epoch": 9.0,
673
+ "eval_accuracy": 1.0,
674
+ "eval_loss": 1.4424324035644531e-05,
675
+ "eval_runtime": 9.9315,
676
+ "eval_samples_per_second": 40.477,
677
+ "eval_steps_per_second": 40.477,
678
+ "step": 846
679
+ },
680
+ {
681
+ "epoch": 9.042553191489361,
682
+ "grad_norm": 0.0006334384088404477,
683
+ "learning_rate": 3.1914893617021275e-05,
684
+ "loss": 0.0,
685
+ "step": 850
686
+ },
687
+ {
688
+ "epoch": 9.148936170212766,
689
+ "grad_norm": 0.0006309397285804152,
690
+ "learning_rate": 2.8368794326241132e-05,
691
+ "loss": 0.0,
692
+ "step": 860
693
+ },
694
+ {
695
+ "epoch": 9.25531914893617,
696
+ "grad_norm": 0.0006310406024567783,
697
+ "learning_rate": 2.482269503546099e-05,
698
+ "loss": 0.0,
699
+ "step": 870
700
+ },
701
+ {
702
+ "epoch": 9.361702127659575,
703
+ "grad_norm": 0.0006308447918854654,
704
+ "learning_rate": 2.1276595744680852e-05,
705
+ "loss": 0.0,
706
+ "step": 880
707
+ },
708
+ {
709
+ "epoch": 9.46808510638298,
710
+ "grad_norm": 0.0006286040297709405,
711
+ "learning_rate": 1.773049645390071e-05,
712
+ "loss": 0.0,
713
+ "step": 890
714
+ },
715
+ {
716
+ "epoch": 9.574468085106384,
717
+ "grad_norm": 0.0006258686189539731,
718
+ "learning_rate": 1.4184397163120566e-05,
719
+ "loss": 0.0,
720
+ "step": 900
721
+ },
722
+ {
723
+ "epoch": 9.680851063829786,
724
+ "grad_norm": 0.0006251951563172042,
725
+ "learning_rate": 1.0638297872340426e-05,
726
+ "loss": 0.0,
727
+ "step": 910
728
+ },
729
+ {
730
+ "epoch": 9.787234042553191,
731
+ "grad_norm": 0.0006240535294637084,
732
+ "learning_rate": 7.092198581560283e-06,
733
+ "loss": 0.0,
734
+ "step": 920
735
+ },
736
+ {
737
+ "epoch": 9.893617021276595,
738
+ "grad_norm": 0.0006240674993023276,
739
+ "learning_rate": 3.5460992907801415e-06,
740
+ "loss": 0.0,
741
+ "step": 930
742
+ },
743
+ {
744
+ "epoch": 10.0,
745
+ "grad_norm": 0.0006238200003281236,
746
+ "learning_rate": 0.0,
747
+ "loss": 0.0,
748
+ "step": 940
749
+ },
750
+ {
751
+ "epoch": 10.0,
752
+ "eval_accuracy": 1.0,
753
+ "eval_loss": 1.4065806681173854e-05,
754
+ "eval_runtime": 9.9903,
755
+ "eval_samples_per_second": 40.239,
756
+ "eval_steps_per_second": 40.239,
757
+ "step": 940
758
+ },
759
+ {
760
+ "epoch": 10.0,
761
+ "step": 940,
762
+ "total_flos": 4.315241031363276e+18,
763
+ "train_loss": 0.13065080859353445,
764
+ "train_runtime": 1086.1024,
765
+ "train_samples_per_second": 27.677,
766
+ "train_steps_per_second": 0.865
767
  }
768
  ],
769
  "logging_steps": 10,
770
+ "max_steps": 940,
771
  "num_input_tokens_seen": 0,
772
  "num_train_epochs": 10,
773
  "save_steps": 500,
 
783
  "attributes": {}
784
  }
785
  },
786
+ "total_flos": 4.315241031363276e+18,
787
  "train_batch_size": 8,
788
  "trial_name": null,
789
  "trial_params": null