Dhyey8 commited on
Commit
fcd0373
1 Parent(s): 0b15979

End of training

Browse files
README.md CHANGED
@@ -32,7 +32,7 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 1.0631
36
  - Accuracy: 0.8391
37
 
38
  ## Model description
 
32
 
33
  This model is a fine-tuned version of [microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 1.1564
36
  - Accuracy: 0.8391
37
 
38
  ## Model description
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 40.0,
3
- "eval_accuracy": 0.8673913043478261,
4
- "eval_loss": 0.6579979062080383,
5
- "eval_runtime": 4.0597,
6
- "eval_samples_per_second": 113.309,
7
- "eval_steps_per_second": 3.695,
8
  "total_flos": 4.585003492737024e+17,
9
- "train_loss": 0.305610773563385,
10
- "train_runtime": 583.5167,
11
- "train_samples_per_second": 39.416,
12
- "train_steps_per_second": 0.257
13
  }
 
1
  {
2
  "epoch": 40.0,
3
+ "eval_accuracy": 0.8391304347826087,
4
+ "eval_loss": 1.1564103364944458,
5
+ "eval_runtime": 4.5109,
6
+ "eval_samples_per_second": 101.976,
7
+ "eval_steps_per_second": 3.325,
8
  "total_flos": 4.585003492737024e+17,
9
+ "train_loss": 2.1741886361440024,
10
+ "train_runtime": 1273.2626,
11
+ "train_samples_per_second": 18.064,
12
+ "train_steps_per_second": 0.118
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 40.0,
3
- "eval_accuracy": 0.8673913043478261,
4
- "eval_loss": 0.6579979062080383,
5
- "eval_runtime": 4.0597,
6
- "eval_samples_per_second": 113.309,
7
- "eval_steps_per_second": 3.695
8
  }
 
1
  {
2
  "epoch": 40.0,
3
+ "eval_accuracy": 0.8391304347826087,
4
+ "eval_loss": 1.1564103364944458,
5
+ "eval_runtime": 4.5109,
6
+ "eval_samples_per_second": 101.976,
7
+ "eval_steps_per_second": 3.325
8
  }
runs/Apr04_09-11-23_fe56890e08d0/events.out.tfevents.1712223190.fe56890e08d0.1029.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9af311342097efaa8fdc346ed9a898d6e8099390e3983c5607b32d1afa9a6a5c
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 40.0,
3
  "total_flos": 4.585003492737024e+17,
4
- "train_loss": 0.305610773563385,
5
- "train_runtime": 583.5167,
6
- "train_samples_per_second": 39.416,
7
- "train_steps_per_second": 0.257
8
  }
 
1
  {
2
  "epoch": 40.0,
3
  "total_flos": 4.585003492737024e+17,
4
+ "train_loss": 2.1741886361440024,
5
+ "train_runtime": 1273.2626,
6
+ "train_samples_per_second": 18.064,
7
+ "train_steps_per_second": 0.118
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.8673913043478261,
3
- "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-teeth_dataset/checkpoint-67",
4
  "epoch": 40.0,
5
  "eval_steps": 500,
6
  "global_step": 150,
@@ -10,477 +10,477 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.8,
13
- "eval_accuracy": 0.7913043478260869,
14
- "eval_loss": 1.1515443325042725,
15
- "eval_runtime": 4.209,
16
- "eval_samples_per_second": 109.29,
17
- "eval_steps_per_second": 3.564,
18
  "step": 3
19
  },
20
  {
21
  "epoch": 1.87,
22
- "eval_accuracy": 0.7630434782608696,
23
- "eval_loss": 1.1545782089233398,
24
- "eval_runtime": 4.4373,
25
- "eval_samples_per_second": 103.665,
26
- "eval_steps_per_second": 3.38,
27
  "step": 7
28
  },
29
  {
30
  "epoch": 2.67,
31
- "grad_norm": 12.2432222366333,
32
  "learning_rate": 3.3333333333333335e-05,
33
- "loss": 0.6441,
34
  "step": 10
35
  },
36
  {
37
  "epoch": 2.93,
38
- "eval_accuracy": 0.8065217391304348,
39
- "eval_loss": 1.070844292640686,
40
- "eval_runtime": 4.2607,
41
- "eval_samples_per_second": 107.964,
42
- "eval_steps_per_second": 3.521,
43
  "step": 11
44
  },
45
  {
46
  "epoch": 4.0,
47
- "eval_accuracy": 0.8217391304347826,
48
- "eval_loss": 0.992546796798706,
49
- "eval_runtime": 4.1431,
50
- "eval_samples_per_second": 111.027,
51
- "eval_steps_per_second": 3.62,
52
  "step": 15
53
  },
54
  {
55
  "epoch": 4.8,
56
- "eval_accuracy": 0.8108695652173913,
57
- "eval_loss": 0.99262535572052,
58
- "eval_runtime": 3.9996,
59
- "eval_samples_per_second": 115.012,
60
- "eval_steps_per_second": 3.75,
61
  "step": 18
62
  },
63
  {
64
  "epoch": 5.33,
65
- "grad_norm": 12.744916915893555,
66
  "learning_rate": 4.814814814814815e-05,
67
- "loss": 0.6042,
68
  "step": 20
69
  },
70
  {
71
  "epoch": 5.87,
72
- "eval_accuracy": 0.7934782608695652,
73
- "eval_loss": 1.016530990600586,
74
- "eval_runtime": 4.0517,
75
- "eval_samples_per_second": 113.532,
76
- "eval_steps_per_second": 3.702,
77
  "step": 22
78
  },
79
  {
80
  "epoch": 6.93,
81
- "eval_accuracy": 0.8347826086956521,
82
- "eval_loss": 0.8679854273796082,
83
- "eval_runtime": 3.9943,
84
- "eval_samples_per_second": 115.163,
85
- "eval_steps_per_second": 3.755,
86
  "step": 26
87
  },
88
  {
89
  "epoch": 8.0,
90
- "grad_norm": 23.645719528198242,
91
  "learning_rate": 4.4444444444444447e-05,
92
- "loss": 0.4293,
93
  "step": 30
94
  },
95
  {
96
  "epoch": 8.0,
97
- "eval_accuracy": 0.8347826086956521,
98
- "eval_loss": 0.8360960483551025,
99
- "eval_runtime": 4.0619,
100
- "eval_samples_per_second": 113.248,
101
- "eval_steps_per_second": 3.693,
102
  "step": 30
103
  },
104
  {
105
  "epoch": 8.8,
106
- "eval_accuracy": 0.8565217391304348,
107
- "eval_loss": 0.8075701594352722,
108
- "eval_runtime": 4.0291,
109
- "eval_samples_per_second": 114.17,
110
- "eval_steps_per_second": 3.723,
111
  "step": 33
112
  },
113
  {
114
  "epoch": 9.87,
115
- "eval_accuracy": 0.8565217391304348,
116
- "eval_loss": 0.7547765970230103,
117
- "eval_runtime": 4.0783,
118
- "eval_samples_per_second": 112.793,
119
- "eval_steps_per_second": 3.678,
120
  "step": 37
121
  },
122
  {
123
  "epoch": 10.67,
124
- "grad_norm": 20.158720016479492,
125
  "learning_rate": 4.074074074074074e-05,
126
- "loss": 0.4471,
127
  "step": 40
128
  },
129
  {
130
  "epoch": 10.93,
131
- "eval_accuracy": 0.8369565217391305,
132
- "eval_loss": 0.7901058793067932,
133
- "eval_runtime": 4.2465,
134
- "eval_samples_per_second": 108.325,
135
- "eval_steps_per_second": 3.532,
136
  "step": 41
137
  },
138
  {
139
  "epoch": 12.0,
140
- "eval_accuracy": 0.8543478260869565,
141
- "eval_loss": 0.7326875925064087,
142
- "eval_runtime": 4.3562,
143
- "eval_samples_per_second": 105.597,
144
- "eval_steps_per_second": 3.443,
145
  "step": 45
146
  },
147
  {
148
  "epoch": 12.8,
149
- "eval_accuracy": 0.8478260869565217,
150
- "eval_loss": 0.748528003692627,
151
- "eval_runtime": 4.402,
152
- "eval_samples_per_second": 104.498,
153
- "eval_steps_per_second": 3.408,
154
  "step": 48
155
  },
156
  {
157
  "epoch": 13.33,
158
- "grad_norm": 11.568655967712402,
159
  "learning_rate": 3.7037037037037037e-05,
160
- "loss": 0.3436,
161
  "step": 50
162
  },
163
  {
164
  "epoch": 13.87,
165
- "eval_accuracy": 0.8347826086956521,
166
- "eval_loss": 0.7844414710998535,
167
- "eval_runtime": 4.2227,
168
- "eval_samples_per_second": 108.934,
169
- "eval_steps_per_second": 3.552,
170
  "step": 52
171
  },
172
  {
173
  "epoch": 14.93,
174
- "eval_accuracy": 0.8630434782608696,
175
- "eval_loss": 0.6644619107246399,
176
- "eval_runtime": 4.1242,
177
- "eval_samples_per_second": 111.537,
178
- "eval_steps_per_second": 3.637,
179
  "step": 56
180
  },
181
  {
182
  "epoch": 16.0,
183
- "grad_norm": 14.64358139038086,
184
  "learning_rate": 3.3333333333333335e-05,
185
- "loss": 0.2813,
186
  "step": 60
187
  },
188
  {
189
  "epoch": 16.0,
190
- "eval_accuracy": 0.8391304347826087,
191
- "eval_loss": 0.7370373010635376,
192
- "eval_runtime": 4.0228,
193
- "eval_samples_per_second": 114.348,
194
- "eval_steps_per_second": 3.729,
195
  "step": 60
196
  },
197
  {
198
  "epoch": 16.8,
199
- "eval_accuracy": 0.8347826086956521,
200
- "eval_loss": 0.7598132491111755,
201
- "eval_runtime": 3.9928,
202
- "eval_samples_per_second": 115.208,
203
- "eval_steps_per_second": 3.757,
204
  "step": 63
205
  },
206
  {
207
  "epoch": 17.87,
208
- "eval_accuracy": 0.8673913043478261,
209
- "eval_loss": 0.6579979062080383,
210
- "eval_runtime": 4.1194,
211
- "eval_samples_per_second": 111.667,
212
- "eval_steps_per_second": 3.641,
213
  "step": 67
214
  },
215
  {
216
  "epoch": 18.67,
217
- "grad_norm": 16.545446395874023,
218
  "learning_rate": 2.962962962962963e-05,
219
- "loss": 0.275,
220
  "step": 70
221
  },
222
  {
223
  "epoch": 18.93,
224
- "eval_accuracy": 0.8456521739130435,
225
- "eval_loss": 0.7026421427726746,
226
- "eval_runtime": 4.039,
227
- "eval_samples_per_second": 113.889,
228
- "eval_steps_per_second": 3.714,
229
  "step": 71
230
  },
231
  {
232
  "epoch": 20.0,
233
- "eval_accuracy": 0.8369565217391305,
234
- "eval_loss": 0.7055774927139282,
235
- "eval_runtime": 4.0414,
236
- "eval_samples_per_second": 113.823,
237
- "eval_steps_per_second": 3.712,
238
  "step": 75
239
  },
240
  {
241
  "epoch": 20.8,
242
- "eval_accuracy": 0.8543478260869565,
243
- "eval_loss": 0.6517285108566284,
244
- "eval_runtime": 4.1333,
245
- "eval_samples_per_second": 111.292,
246
- "eval_steps_per_second": 3.629,
247
  "step": 78
248
  },
249
  {
250
  "epoch": 21.33,
251
- "grad_norm": 13.078596115112305,
252
  "learning_rate": 2.5925925925925925e-05,
253
- "loss": 0.2674,
254
  "step": 80
255
  },
256
  {
257
  "epoch": 21.87,
258
- "eval_accuracy": 0.8608695652173913,
259
- "eval_loss": 0.5930755734443665,
260
- "eval_runtime": 4.2994,
261
- "eval_samples_per_second": 106.991,
262
- "eval_steps_per_second": 3.489,
263
  "step": 82
264
  },
265
  {
266
  "epoch": 22.93,
267
- "eval_accuracy": 0.8608695652173913,
268
- "eval_loss": 0.5985938310623169,
269
- "eval_runtime": 4.3353,
270
- "eval_samples_per_second": 106.105,
271
- "eval_steps_per_second": 3.46,
272
  "step": 86
273
  },
274
  {
275
  "epoch": 24.0,
276
- "grad_norm": 8.3045015335083,
277
  "learning_rate": 2.2222222222222223e-05,
278
- "loss": 0.2278,
279
  "step": 90
280
  },
281
  {
282
  "epoch": 24.0,
283
- "eval_accuracy": 0.8608695652173913,
284
- "eval_loss": 0.5982087254524231,
285
- "eval_runtime": 4.3231,
286
- "eval_samples_per_second": 106.404,
287
- "eval_steps_per_second": 3.47,
288
  "step": 90
289
  },
290
  {
291
  "epoch": 24.8,
292
- "eval_accuracy": 0.8543478260869565,
293
- "eval_loss": 0.604324460029602,
294
- "eval_runtime": 4.0948,
295
- "eval_samples_per_second": 112.337,
296
- "eval_steps_per_second": 3.663,
297
  "step": 93
298
  },
299
  {
300
  "epoch": 25.87,
301
- "eval_accuracy": 0.8521739130434782,
302
- "eval_loss": 0.6148393750190735,
303
- "eval_runtime": 4.0061,
304
- "eval_samples_per_second": 114.824,
305
- "eval_steps_per_second": 3.744,
306
  "step": 97
307
  },
308
  {
309
  "epoch": 26.67,
310
- "grad_norm": 15.322602272033691,
311
  "learning_rate": 1.8518518518518518e-05,
312
- "loss": 0.2034,
313
  "step": 100
314
  },
315
  {
316
  "epoch": 26.93,
317
- "eval_accuracy": 0.85,
318
- "eval_loss": 0.6160370111465454,
319
- "eval_runtime": 4.0736,
320
- "eval_samples_per_second": 112.922,
321
- "eval_steps_per_second": 3.682,
322
  "step": 101
323
  },
324
  {
325
  "epoch": 28.0,
326
- "eval_accuracy": 0.8521739130434782,
327
- "eval_loss": 0.5903608798980713,
328
- "eval_runtime": 4.1333,
329
- "eval_samples_per_second": 111.291,
330
- "eval_steps_per_second": 3.629,
331
  "step": 105
332
  },
333
  {
334
  "epoch": 28.8,
335
- "eval_accuracy": 0.8608695652173913,
336
- "eval_loss": 0.5824848413467407,
337
- "eval_runtime": 4.2951,
338
- "eval_samples_per_second": 107.1,
339
- "eval_steps_per_second": 3.492,
340
  "step": 108
341
  },
342
  {
343
  "epoch": 29.33,
344
- "grad_norm": 12.949268341064453,
345
  "learning_rate": 1.4814814814814815e-05,
346
- "loss": 0.2207,
347
  "step": 110
348
  },
349
  {
350
  "epoch": 29.87,
351
- "eval_accuracy": 0.8478260869565217,
352
- "eval_loss": 0.6132237911224365,
353
- "eval_runtime": 4.382,
354
- "eval_samples_per_second": 104.975,
355
- "eval_steps_per_second": 3.423,
356
  "step": 112
357
  },
358
  {
359
  "epoch": 30.93,
360
- "eval_accuracy": 0.8391304347826087,
361
- "eval_loss": 0.6181351542472839,
362
- "eval_runtime": 4.27,
363
- "eval_samples_per_second": 107.728,
364
- "eval_steps_per_second": 3.513,
365
  "step": 116
366
  },
367
  {
368
  "epoch": 32.0,
369
- "grad_norm": 5.803860664367676,
370
  "learning_rate": 1.1111111111111112e-05,
371
- "loss": 0.1701,
372
  "step": 120
373
  },
374
  {
375
  "epoch": 32.0,
376
- "eval_accuracy": 0.85,
377
- "eval_loss": 0.5688785910606384,
378
- "eval_runtime": 4.1338,
379
- "eval_samples_per_second": 111.279,
380
- "eval_steps_per_second": 3.629,
381
  "step": 120
382
  },
383
  {
384
  "epoch": 32.8,
385
- "eval_accuracy": 0.8608695652173913,
386
- "eval_loss": 0.5633876919746399,
387
- "eval_runtime": 4.7704,
388
- "eval_samples_per_second": 96.428,
389
- "eval_steps_per_second": 3.144,
390
  "step": 123
391
  },
392
  {
393
  "epoch": 33.87,
394
- "eval_accuracy": 0.8652173913043478,
395
- "eval_loss": 0.5659446120262146,
396
- "eval_runtime": 4.0029,
397
- "eval_samples_per_second": 114.915,
398
- "eval_steps_per_second": 3.747,
399
  "step": 127
400
  },
401
  {
402
  "epoch": 34.67,
403
- "grad_norm": 18.19119644165039,
404
  "learning_rate": 7.4074074074074075e-06,
405
- "loss": 0.1681,
406
  "step": 130
407
  },
408
  {
409
  "epoch": 34.93,
410
- "eval_accuracy": 0.8565217391304348,
411
- "eval_loss": 0.5895132422447205,
412
- "eval_runtime": 4.0203,
413
- "eval_samples_per_second": 114.42,
414
- "eval_steps_per_second": 3.731,
415
  "step": 131
416
  },
417
  {
418
  "epoch": 36.0,
419
- "eval_accuracy": 0.85,
420
- "eval_loss": 0.6094810366630554,
421
- "eval_runtime": 4.1594,
422
- "eval_samples_per_second": 110.594,
423
- "eval_steps_per_second": 3.606,
424
  "step": 135
425
  },
426
  {
427
  "epoch": 36.8,
428
- "eval_accuracy": 0.8456521739130435,
429
- "eval_loss": 0.614500105381012,
430
- "eval_runtime": 4.3427,
431
- "eval_samples_per_second": 105.924,
432
- "eval_steps_per_second": 3.454,
433
  "step": 138
434
  },
435
  {
436
  "epoch": 37.33,
437
- "grad_norm": 7.945148468017578,
438
  "learning_rate": 3.7037037037037037e-06,
439
- "loss": 0.1435,
440
  "step": 140
441
  },
442
  {
443
  "epoch": 37.87,
444
- "eval_accuracy": 0.8478260869565217,
445
- "eval_loss": 0.6111825704574585,
446
- "eval_runtime": 4.3974,
447
- "eval_samples_per_second": 104.606,
448
- "eval_steps_per_second": 3.411,
449
  "step": 142
450
  },
451
  {
452
  "epoch": 38.93,
453
- "eval_accuracy": 0.8456521739130435,
454
- "eval_loss": 0.599669873714447,
455
- "eval_runtime": 4.2726,
456
- "eval_samples_per_second": 107.662,
457
- "eval_steps_per_second": 3.511,
458
  "step": 146
459
  },
460
  {
461
  "epoch": 40.0,
462
- "grad_norm": 11.281535148620605,
463
  "learning_rate": 0.0,
464
- "loss": 0.1586,
465
  "step": 150
466
  },
467
  {
468
  "epoch": 40.0,
469
- "eval_accuracy": 0.85,
470
- "eval_loss": 0.5965761542320251,
471
- "eval_runtime": 4.0634,
472
- "eval_samples_per_second": 113.205,
473
- "eval_steps_per_second": 3.691,
474
  "step": 150
475
  },
476
  {
477
  "epoch": 40.0,
478
  "step": 150,
479
  "total_flos": 4.585003492737024e+17,
480
- "train_loss": 0.305610773563385,
481
- "train_runtime": 583.5167,
482
- "train_samples_per_second": 39.416,
483
- "train_steps_per_second": 0.257
484
  }
485
  ],
486
  "logging_steps": 10,
 
1
  {
2
+ "best_metric": 0.8391304347826087,
3
+ "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-teeth_dataset/checkpoint-120",
4
  "epoch": 40.0,
5
  "eval_steps": 500,
6
  "global_step": 150,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.8,
13
+ "eval_accuracy": 0.015217391304347827,
14
+ "eval_loss": 4.579592704772949,
15
+ "eval_runtime": 349.3053,
16
+ "eval_samples_per_second": 1.317,
17
+ "eval_steps_per_second": 0.043,
18
  "step": 3
19
  },
20
  {
21
  "epoch": 1.87,
22
+ "eval_accuracy": 0.02608695652173913,
23
+ "eval_loss": 4.519979000091553,
24
+ "eval_runtime": 4.4643,
25
+ "eval_samples_per_second": 103.04,
26
+ "eval_steps_per_second": 3.36,
27
  "step": 7
28
  },
29
  {
30
  "epoch": 2.67,
31
+ "grad_norm": 4.626802921295166,
32
  "learning_rate": 3.3333333333333335e-05,
33
+ "loss": 4.5616,
34
  "step": 10
35
  },
36
  {
37
  "epoch": 2.93,
38
+ "eval_accuracy": 0.03260869565217391,
39
+ "eval_loss": 4.470459461212158,
40
+ "eval_runtime": 4.8338,
41
+ "eval_samples_per_second": 95.164,
42
+ "eval_steps_per_second": 3.103,
43
  "step": 11
44
  },
45
  {
46
  "epoch": 4.0,
47
+ "eval_accuracy": 0.06739130434782609,
48
+ "eval_loss": 4.412718296051025,
49
+ "eval_runtime": 5.0133,
50
+ "eval_samples_per_second": 91.756,
51
+ "eval_steps_per_second": 2.992,
52
  "step": 15
53
  },
54
  {
55
  "epoch": 4.8,
56
+ "eval_accuracy": 0.08043478260869565,
57
+ "eval_loss": 4.349262714385986,
58
+ "eval_runtime": 3.9868,
59
+ "eval_samples_per_second": 115.382,
60
+ "eval_steps_per_second": 3.762,
61
  "step": 18
62
  },
63
  {
64
  "epoch": 5.33,
65
+ "grad_norm": 4.411898136138916,
66
  "learning_rate": 4.814814814814815e-05,
67
+ "loss": 4.44,
68
  "step": 20
69
  },
70
  {
71
  "epoch": 5.87,
72
+ "eval_accuracy": 0.11304347826086956,
73
+ "eval_loss": 4.242533206939697,
74
+ "eval_runtime": 4.0721,
75
+ "eval_samples_per_second": 112.963,
76
+ "eval_steps_per_second": 3.684,
77
  "step": 22
78
  },
79
  {
80
  "epoch": 6.93,
81
+ "eval_accuracy": 0.13695652173913042,
82
+ "eval_loss": 4.110694885253906,
83
+ "eval_runtime": 4.2677,
84
+ "eval_samples_per_second": 107.788,
85
+ "eval_steps_per_second": 3.515,
86
  "step": 26
87
  },
88
  {
89
  "epoch": 8.0,
90
+ "grad_norm": 10.778473854064941,
91
  "learning_rate": 4.4444444444444447e-05,
92
+ "loss": 4.1823,
93
  "step": 30
94
  },
95
  {
96
  "epoch": 8.0,
97
+ "eval_accuracy": 0.1608695652173913,
98
+ "eval_loss": 3.9340145587921143,
99
+ "eval_runtime": 4.4318,
100
+ "eval_samples_per_second": 103.796,
101
+ "eval_steps_per_second": 3.385,
102
  "step": 30
103
  },
104
  {
105
  "epoch": 8.8,
106
+ "eval_accuracy": 0.1934782608695652,
107
+ "eval_loss": 3.7821249961853027,
108
+ "eval_runtime": 4.4828,
109
+ "eval_samples_per_second": 102.615,
110
+ "eval_steps_per_second": 3.346,
111
  "step": 33
112
  },
113
  {
114
  "epoch": 9.87,
115
+ "eval_accuracy": 0.2782608695652174,
116
+ "eval_loss": 3.5313777923583984,
117
+ "eval_runtime": 4.1068,
118
+ "eval_samples_per_second": 112.01,
119
+ "eval_steps_per_second": 3.653,
120
  "step": 37
121
  },
122
  {
123
  "epoch": 10.67,
124
+ "grad_norm": 14.638664245605469,
125
  "learning_rate": 4.074074074074074e-05,
126
+ "loss": 3.6357,
127
  "step": 40
128
  },
129
  {
130
  "epoch": 10.93,
131
+ "eval_accuracy": 0.30434782608695654,
132
+ "eval_loss": 3.285728693008423,
133
+ "eval_runtime": 4.0688,
134
+ "eval_samples_per_second": 113.055,
135
+ "eval_steps_per_second": 3.687,
136
  "step": 41
137
  },
138
  {
139
  "epoch": 12.0,
140
+ "eval_accuracy": 0.3695652173913043,
141
+ "eval_loss": 3.106356143951416,
142
+ "eval_runtime": 4.0947,
143
+ "eval_samples_per_second": 112.339,
144
+ "eval_steps_per_second": 3.663,
145
  "step": 45
146
  },
147
  {
148
  "epoch": 12.8,
149
+ "eval_accuracy": 0.3826086956521739,
150
+ "eval_loss": 2.9712679386138916,
151
+ "eval_runtime": 4.065,
152
+ "eval_samples_per_second": 113.162,
153
+ "eval_steps_per_second": 3.69,
154
  "step": 48
155
  },
156
  {
157
  "epoch": 13.33,
158
+ "grad_norm": 24.2481746673584,
159
  "learning_rate": 3.7037037037037037e-05,
160
+ "loss": 3.0041,
161
  "step": 50
162
  },
163
  {
164
  "epoch": 13.87,
165
+ "eval_accuracy": 0.48695652173913045,
166
+ "eval_loss": 2.7171707153320312,
167
+ "eval_runtime": 4.147,
168
+ "eval_samples_per_second": 110.923,
169
+ "eval_steps_per_second": 3.617,
170
  "step": 52
171
  },
172
  {
173
  "epoch": 14.93,
174
+ "eval_accuracy": 0.5434782608695652,
175
+ "eval_loss": 2.511073589324951,
176
+ "eval_runtime": 4.329,
177
+ "eval_samples_per_second": 106.261,
178
+ "eval_steps_per_second": 3.465,
179
  "step": 56
180
  },
181
  {
182
  "epoch": 16.0,
183
+ "grad_norm": 17.775148391723633,
184
  "learning_rate": 3.3333333333333335e-05,
185
+ "loss": 2.4604,
186
  "step": 60
187
  },
188
  {
189
  "epoch": 16.0,
190
+ "eval_accuracy": 0.5695652173913044,
191
+ "eval_loss": 2.356055736541748,
192
+ "eval_runtime": 4.5447,
193
+ "eval_samples_per_second": 101.216,
194
+ "eval_steps_per_second": 3.301,
195
  "step": 60
196
  },
197
  {
198
  "epoch": 16.8,
199
+ "eval_accuracy": 0.5717391304347826,
200
+ "eval_loss": 2.2684459686279297,
201
+ "eval_runtime": 4.3589,
202
+ "eval_samples_per_second": 105.531,
203
+ "eval_steps_per_second": 3.441,
204
  "step": 63
205
  },
206
  {
207
  "epoch": 17.87,
208
+ "eval_accuracy": 0.6347826086956522,
209
+ "eval_loss": 2.0961129665374756,
210
+ "eval_runtime": 4.0684,
211
+ "eval_samples_per_second": 113.066,
212
+ "eval_steps_per_second": 3.687,
213
  "step": 67
214
  },
215
  {
216
  "epoch": 18.67,
217
+ "grad_norm": 28.70151710510254,
218
  "learning_rate": 2.962962962962963e-05,
219
+ "loss": 1.971,
220
  "step": 70
221
  },
222
  {
223
  "epoch": 18.93,
224
+ "eval_accuracy": 0.6782608695652174,
225
+ "eval_loss": 1.9555323123931885,
226
+ "eval_runtime": 4.1018,
227
+ "eval_samples_per_second": 112.147,
228
+ "eval_steps_per_second": 3.657,
229
  "step": 71
230
  },
231
  {
232
  "epoch": 20.0,
233
+ "eval_accuracy": 0.6891304347826087,
234
+ "eval_loss": 1.8400135040283203,
235
+ "eval_runtime": 4.114,
236
+ "eval_samples_per_second": 111.814,
237
+ "eval_steps_per_second": 3.646,
238
  "step": 75
239
  },
240
  {
241
  "epoch": 20.8,
242
+ "eval_accuracy": 0.7239130434782609,
243
+ "eval_loss": 1.78555166721344,
244
+ "eval_runtime": 4.1163,
245
+ "eval_samples_per_second": 111.752,
246
+ "eval_steps_per_second": 3.644,
247
  "step": 78
248
  },
249
  {
250
  "epoch": 21.33,
251
+ "grad_norm": 25.09538459777832,
252
  "learning_rate": 2.5925925925925925e-05,
253
+ "loss": 1.651,
254
  "step": 80
255
  },
256
  {
257
  "epoch": 21.87,
258
+ "eval_accuracy": 0.7369565217391304,
259
+ "eval_loss": 1.6797059774398804,
260
+ "eval_runtime": 4.243,
261
+ "eval_samples_per_second": 108.413,
262
+ "eval_steps_per_second": 3.535,
263
  "step": 82
264
  },
265
  {
266
  "epoch": 22.93,
267
+ "eval_accuracy": 0.7717391304347826,
268
+ "eval_loss": 1.600671410560608,
269
+ "eval_runtime": 4.4052,
270
+ "eval_samples_per_second": 104.422,
271
+ "eval_steps_per_second": 3.405,
272
  "step": 86
273
  },
274
  {
275
  "epoch": 24.0,
276
+ "grad_norm": 30.617403030395508,
277
  "learning_rate": 2.2222222222222223e-05,
278
+ "loss": 1.3665,
279
  "step": 90
280
  },
281
  {
282
  "epoch": 24.0,
283
+ "eval_accuracy": 0.7739130434782608,
284
+ "eval_loss": 1.5255870819091797,
285
+ "eval_runtime": 4.5557,
286
+ "eval_samples_per_second": 100.972,
287
+ "eval_steps_per_second": 3.293,
288
  "step": 90
289
  },
290
  {
291
  "epoch": 24.8,
292
+ "eval_accuracy": 0.7652173913043478,
293
+ "eval_loss": 1.4875919818878174,
294
+ "eval_runtime": 4.9234,
295
+ "eval_samples_per_second": 93.432,
296
+ "eval_steps_per_second": 3.047,
297
  "step": 93
298
  },
299
  {
300
  "epoch": 25.87,
301
+ "eval_accuracy": 0.7782608695652173,
302
+ "eval_loss": 1.4394937753677368,
303
+ "eval_runtime": 4.1294,
304
+ "eval_samples_per_second": 111.395,
305
+ "eval_steps_per_second": 3.632,
306
  "step": 97
307
  },
308
  {
309
  "epoch": 26.67,
310
+ "grad_norm": 17.45021629333496,
311
  "learning_rate": 1.8518518518518518e-05,
312
+ "loss": 1.1954,
313
  "step": 100
314
  },
315
  {
316
  "epoch": 26.93,
317
+ "eval_accuracy": 0.7869565217391304,
318
+ "eval_loss": 1.3679003715515137,
319
+ "eval_runtime": 4.1015,
320
+ "eval_samples_per_second": 112.155,
321
+ "eval_steps_per_second": 3.657,
322
  "step": 101
323
  },
324
  {
325
  "epoch": 28.0,
326
+ "eval_accuracy": 0.8021739130434783,
327
+ "eval_loss": 1.304268479347229,
328
+ "eval_runtime": 4.1469,
329
+ "eval_samples_per_second": 110.928,
330
+ "eval_steps_per_second": 3.617,
331
  "step": 105
332
  },
333
  {
334
  "epoch": 28.8,
335
+ "eval_accuracy": 0.8021739130434783,
336
+ "eval_loss": 1.2906242609024048,
337
+ "eval_runtime": 4.1542,
338
+ "eval_samples_per_second": 110.731,
339
+ "eval_steps_per_second": 3.611,
340
  "step": 108
341
  },
342
  {
343
  "epoch": 29.33,
344
+ "grad_norm": 16.89618492126465,
345
  "learning_rate": 1.4814814814814815e-05,
346
+ "loss": 0.9886,
347
  "step": 110
348
  },
349
  {
350
  "epoch": 29.87,
351
+ "eval_accuracy": 0.8108695652173913,
352
+ "eval_loss": 1.2312711477279663,
353
+ "eval_runtime": 4.2901,
354
+ "eval_samples_per_second": 107.224,
355
+ "eval_steps_per_second": 3.496,
356
  "step": 112
357
  },
358
  {
359
  "epoch": 30.93,
360
+ "eval_accuracy": 0.8347826086956521,
361
+ "eval_loss": 1.182868480682373,
362
+ "eval_runtime": 4.4407,
363
+ "eval_samples_per_second": 103.588,
364
+ "eval_steps_per_second": 3.378,
365
  "step": 116
366
  },
367
  {
368
  "epoch": 32.0,
369
+ "grad_norm": 14.743515968322754,
370
  "learning_rate": 1.1111111111111112e-05,
371
+ "loss": 0.8803,
372
  "step": 120
373
  },
374
  {
375
  "epoch": 32.0,
376
+ "eval_accuracy": 0.8391304347826087,
377
+ "eval_loss": 1.1564103364944458,
378
+ "eval_runtime": 4.4277,
379
+ "eval_samples_per_second": 103.892,
380
+ "eval_steps_per_second": 3.388,
381
  "step": 120
382
  },
383
  {
384
  "epoch": 32.8,
385
+ "eval_accuracy": 0.8304347826086956,
386
+ "eval_loss": 1.1421394348144531,
387
+ "eval_runtime": 4.2096,
388
+ "eval_samples_per_second": 109.275,
389
+ "eval_steps_per_second": 3.563,
390
  "step": 123
391
  },
392
  {
393
  "epoch": 33.87,
394
+ "eval_accuracy": 0.8326086956521739,
395
+ "eval_loss": 1.114410400390625,
396
+ "eval_runtime": 4.0812,
397
+ "eval_samples_per_second": 112.713,
398
+ "eval_steps_per_second": 3.675,
399
  "step": 127
400
  },
401
  {
402
  "epoch": 34.67,
403
+ "grad_norm": 15.186381340026855,
404
  "learning_rate": 7.4074074074074075e-06,
405
+ "loss": 0.815,
406
  "step": 130
407
  },
408
  {
409
  "epoch": 34.93,
410
+ "eval_accuracy": 0.8304347826086956,
411
+ "eval_loss": 1.107426404953003,
412
+ "eval_runtime": 4.1243,
413
+ "eval_samples_per_second": 111.534,
414
+ "eval_steps_per_second": 3.637,
415
  "step": 131
416
  },
417
  {
418
  "epoch": 36.0,
419
+ "eval_accuracy": 0.8282608695652174,
420
+ "eval_loss": 1.0919322967529297,
421
+ "eval_runtime": 4.1615,
422
+ "eval_samples_per_second": 110.537,
423
+ "eval_steps_per_second": 3.604,
424
  "step": 135
425
  },
426
  {
427
  "epoch": 36.8,
428
+ "eval_accuracy": 0.8326086956521739,
429
+ "eval_loss": 1.082062840461731,
430
+ "eval_runtime": 4.184,
431
+ "eval_samples_per_second": 109.942,
432
+ "eval_steps_per_second": 3.585,
433
  "step": 138
434
  },
435
  {
436
  "epoch": 37.33,
437
+ "grad_norm": 12.77287483215332,
438
  "learning_rate": 3.7037037037037037e-06,
439
+ "loss": 0.7619,
440
  "step": 140
441
  },
442
  {
443
  "epoch": 37.87,
444
+ "eval_accuracy": 0.8347826086956521,
445
+ "eval_loss": 1.070085883140564,
446
+ "eval_runtime": 4.3491,
447
+ "eval_samples_per_second": 105.769,
448
+ "eval_steps_per_second": 3.449,
449
  "step": 142
450
  },
451
  {
452
  "epoch": 38.93,
453
+ "eval_accuracy": 0.8347826086956521,
454
+ "eval_loss": 1.0642296075820923,
455
+ "eval_runtime": 4.4959,
456
+ "eval_samples_per_second": 102.315,
457
+ "eval_steps_per_second": 3.336,
458
  "step": 146
459
  },
460
  {
461
  "epoch": 40.0,
462
+ "grad_norm": 20.172346115112305,
463
  "learning_rate": 0.0,
464
+ "loss": 0.6991,
465
  "step": 150
466
  },
467
  {
468
  "epoch": 40.0,
469
+ "eval_accuracy": 0.8391304347826087,
470
+ "eval_loss": 1.063112497329712,
471
+ "eval_runtime": 4.3636,
472
+ "eval_samples_per_second": 105.417,
473
+ "eval_steps_per_second": 3.438,
474
  "step": 150
475
  },
476
  {
477
  "epoch": 40.0,
478
  "step": 150,
479
  "total_flos": 4.585003492737024e+17,
480
+ "train_loss": 2.1741886361440024,
481
+ "train_runtime": 1273.2626,
482
+ "train_samples_per_second": 18.064,
483
+ "train_steps_per_second": 0.118
484
  }
485
  ],
486
  "logging_steps": 10,