rukundob451 commited on
Commit
eba306f
1 Parent(s): bac6afa

End of training

Browse files
README.md CHANGED
@@ -32,7 +32,7 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [facebook/deit-tiny-patch16-224](https://huggingface.co/facebook/deit-tiny-patch16-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.3409
36
  - Accuracy: 0.9118
37
 
38
  ## Model description
 
32
 
33
  This model is a fine-tuned version of [facebook/deit-tiny-patch16-224](https://huggingface.co/facebook/deit-tiny-patch16-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.3421
36
  - Accuracy: 0.9118
37
 
38
  ## Model description
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 14.805194805194805,
3
- "eval_accuracy": 0.8529411764705882,
4
- "eval_loss": 0.4389486610889435,
5
- "eval_runtime": 28.0984,
6
- "eval_samples_per_second": 4.84,
7
- "eval_steps_per_second": 0.32,
8
  "total_flos": 9.04681758989353e+16,
9
- "train_loss": 0.6850269221422965,
10
- "train_runtime": 4202.2024,
11
- "train_samples_per_second": 4.369,
12
- "train_steps_per_second": 0.068
13
  }
 
1
  {
2
  "epoch": 14.805194805194805,
3
+ "eval_accuracy": 0.9117647058823529,
4
+ "eval_loss": 0.34208229184150696,
5
+ "eval_runtime": 28.5471,
6
+ "eval_samples_per_second": 4.764,
7
+ "eval_steps_per_second": 0.315,
8
  "total_flos": 9.04681758989353e+16,
9
+ "train_loss": 0.2430994153022766,
10
+ "train_runtime": 4010.606,
11
+ "train_samples_per_second": 4.578,
12
+ "train_steps_per_second": 0.071
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 14.805194805194805,
3
- "eval_accuracy": 0.8529411764705882,
4
- "eval_loss": 0.4389486610889435,
5
- "eval_runtime": 28.0984,
6
- "eval_samples_per_second": 4.84,
7
- "eval_steps_per_second": 0.32
8
  }
 
1
  {
2
  "epoch": 14.805194805194805,
3
+ "eval_accuracy": 0.9117647058823529,
4
+ "eval_loss": 0.34208229184150696,
5
+ "eval_runtime": 28.5471,
6
+ "eval_samples_per_second": 4.764,
7
+ "eval_steps_per_second": 0.315
8
  }
runs/Sep18_05-34-03_0a55b5166c22/events.out.tfevents.1726646257.0a55b5166c22.36.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77503f0fca795d6bb2c8c46ca193331e9cf704748c14caa88f8697dc64e57bb8
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 14.805194805194805,
3
  "total_flos": 9.04681758989353e+16,
4
- "train_loss": 0.6850269221422965,
5
- "train_runtime": 4202.2024,
6
- "train_samples_per_second": 4.369,
7
- "train_steps_per_second": 0.068
8
  }
 
1
  {
2
  "epoch": 14.805194805194805,
3
  "total_flos": 9.04681758989353e+16,
4
+ "train_loss": 0.2430994153022766,
5
+ "train_runtime": 4010.606,
6
+ "train_samples_per_second": 4.578,
7
+ "train_steps_per_second": 0.071
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.8529411764705882,
3
  "best_model_checkpoint": "deit-tiny-patch16-224-finetuned-papsmear/checkpoint-269",
4
  "epoch": 14.805194805194805,
5
  "eval_steps": 500,
@@ -10,343 +10,343 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.5194805194805194,
13
- "grad_norm": 5.887332439422607,
14
  "learning_rate": 1.7241379310344828e-05,
15
- "loss": 1.8247,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.987012987012987,
20
- "eval_accuracy": 0.3014705882352941,
21
- "eval_loss": 1.6199148893356323,
22
- "eval_runtime": 27.9963,
23
- "eval_samples_per_second": 4.858,
24
- "eval_steps_per_second": 0.321,
25
  "step": 19
26
  },
27
  {
28
  "epoch": 1.0389610389610389,
29
- "grad_norm": 7.520359039306641,
30
  "learning_rate": 3.4482758620689657e-05,
31
- "loss": 1.6044,
32
  "step": 20
33
  },
34
  {
35
  "epoch": 1.5584415584415585,
36
- "grad_norm": 8.020193099975586,
37
  "learning_rate": 4.9804687500000004e-05,
38
- "loss": 1.415,
39
  "step": 30
40
  },
41
  {
42
  "epoch": 1.974025974025974,
43
- "eval_accuracy": 0.5147058823529411,
44
- "eval_loss": 1.2593767642974854,
45
- "eval_runtime": 28.328,
46
- "eval_samples_per_second": 4.801,
47
- "eval_steps_per_second": 0.318,
48
  "step": 38
49
  },
50
  {
51
  "epoch": 2.0779220779220777,
52
- "grad_norm": 9.726988792419434,
53
  "learning_rate": 4.78515625e-05,
54
- "loss": 1.2695,
55
  "step": 40
56
  },
57
  {
58
  "epoch": 2.5974025974025974,
59
- "grad_norm": 10.028353691101074,
60
  "learning_rate": 4.58984375e-05,
61
- "loss": 1.06,
62
  "step": 50
63
  },
64
  {
65
  "epoch": 2.961038961038961,
66
- "eval_accuracy": 0.6470588235294118,
67
- "eval_loss": 1.0316276550292969,
68
- "eval_runtime": 28.6395,
69
- "eval_samples_per_second": 4.749,
70
- "eval_steps_per_second": 0.314,
71
  "step": 57
72
  },
73
  {
74
  "epoch": 3.116883116883117,
75
- "grad_norm": 11.8433837890625,
76
  "learning_rate": 4.3945312500000005e-05,
77
- "loss": 1.0001,
78
  "step": 60
79
  },
80
  {
81
  "epoch": 3.6363636363636362,
82
- "grad_norm": 9.538862228393555,
83
  "learning_rate": 4.1992187500000003e-05,
84
- "loss": 0.8808,
85
  "step": 70
86
  },
87
  {
88
  "epoch": 4.0,
89
- "eval_accuracy": 0.625,
90
- "eval_loss": 1.0088493824005127,
91
- "eval_runtime": 28.6257,
92
- "eval_samples_per_second": 4.751,
93
- "eval_steps_per_second": 0.314,
94
  "step": 77
95
  },
96
  {
97
  "epoch": 4.1558441558441555,
98
- "grad_norm": 13.59125804901123,
99
  "learning_rate": 4.00390625e-05,
100
- "loss": 0.9073,
101
  "step": 80
102
  },
103
  {
104
  "epoch": 4.675324675324675,
105
- "grad_norm": 10.73096752166748,
106
  "learning_rate": 3.80859375e-05,
107
- "loss": 0.7646,
108
  "step": 90
109
  },
110
  {
111
  "epoch": 4.987012987012987,
112
- "eval_accuracy": 0.6985294117647058,
113
- "eval_loss": 0.8210764527320862,
114
- "eval_runtime": 28.1022,
115
- "eval_samples_per_second": 4.839,
116
- "eval_steps_per_second": 0.32,
117
  "step": 96
118
  },
119
  {
120
  "epoch": 5.194805194805195,
121
- "grad_norm": 9.227697372436523,
122
  "learning_rate": 3.6132812500000005e-05,
123
- "loss": 0.7113,
124
  "step": 100
125
  },
126
  {
127
  "epoch": 5.714285714285714,
128
- "grad_norm": 9.764923095703125,
129
  "learning_rate": 3.41796875e-05,
130
- "loss": 0.6798,
131
  "step": 110
132
  },
133
  {
134
  "epoch": 5.974025974025974,
135
- "eval_accuracy": 0.7132352941176471,
136
- "eval_loss": 0.7382610440254211,
137
- "eval_runtime": 28.5068,
138
- "eval_samples_per_second": 4.771,
139
- "eval_steps_per_second": 0.316,
140
  "step": 115
141
  },
142
  {
143
  "epoch": 6.233766233766234,
144
- "grad_norm": 6.136936187744141,
145
  "learning_rate": 3.22265625e-05,
146
- "loss": 0.6017,
147
  "step": 120
148
  },
149
  {
150
  "epoch": 6.753246753246753,
151
- "grad_norm": 14.086496353149414,
152
  "learning_rate": 3.02734375e-05,
153
- "loss": 0.554,
154
  "step": 130
155
  },
156
  {
157
  "epoch": 6.961038961038961,
158
- "eval_accuracy": 0.7573529411764706,
159
- "eval_loss": 0.6476972103118896,
160
- "eval_runtime": 28.4917,
161
- "eval_samples_per_second": 4.773,
162
- "eval_steps_per_second": 0.316,
163
  "step": 134
164
  },
165
  {
166
  "epoch": 7.2727272727272725,
167
- "grad_norm": 10.119462966918945,
168
  "learning_rate": 2.83203125e-05,
169
- "loss": 0.581,
170
  "step": 140
171
  },
172
  {
173
  "epoch": 7.792207792207792,
174
- "grad_norm": 9.049328804016113,
175
  "learning_rate": 2.63671875e-05,
176
- "loss": 0.5358,
177
  "step": 150
178
  },
179
  {
180
  "epoch": 8.0,
181
- "eval_accuracy": 0.7647058823529411,
182
- "eval_loss": 0.5824333429336548,
183
- "eval_runtime": 28.7788,
184
- "eval_samples_per_second": 4.726,
185
- "eval_steps_per_second": 0.313,
186
  "step": 154
187
  },
188
  {
189
  "epoch": 8.311688311688311,
190
- "grad_norm": 14.884687423706055,
191
  "learning_rate": 2.44140625e-05,
192
- "loss": 0.4674,
193
  "step": 160
194
  },
195
  {
196
  "epoch": 8.831168831168831,
197
- "grad_norm": 11.825162887573242,
198
  "learning_rate": 2.24609375e-05,
199
- "loss": 0.4689,
200
  "step": 170
201
  },
202
  {
203
  "epoch": 8.987012987012987,
204
- "eval_accuracy": 0.7794117647058824,
205
- "eval_loss": 0.5570951104164124,
206
- "eval_runtime": 28.5562,
207
- "eval_samples_per_second": 4.763,
208
- "eval_steps_per_second": 0.315,
209
  "step": 173
210
  },
211
  {
212
  "epoch": 9.35064935064935,
213
- "grad_norm": 9.966752052307129,
214
  "learning_rate": 2.05078125e-05,
215
- "loss": 0.414,
216
  "step": 180
217
  },
218
  {
219
  "epoch": 9.87012987012987,
220
- "grad_norm": 10.21314525604248,
221
  "learning_rate": 1.85546875e-05,
222
- "loss": 0.4217,
223
  "step": 190
224
  },
225
  {
226
  "epoch": 9.974025974025974,
227
- "eval_accuracy": 0.7867647058823529,
228
- "eval_loss": 0.5505570769309998,
229
- "eval_runtime": 28.6499,
230
- "eval_samples_per_second": 4.747,
231
- "eval_steps_per_second": 0.314,
232
  "step": 192
233
  },
234
  {
235
  "epoch": 10.38961038961039,
236
- "grad_norm": 8.933082580566406,
237
  "learning_rate": 1.66015625e-05,
238
- "loss": 0.3979,
239
  "step": 200
240
  },
241
  {
242
  "epoch": 10.909090909090908,
243
- "grad_norm": 12.334419250488281,
244
  "learning_rate": 1.4648437500000001e-05,
245
- "loss": 0.4063,
246
  "step": 210
247
  },
248
  {
249
  "epoch": 10.96103896103896,
250
- "eval_accuracy": 0.8235294117647058,
251
- "eval_loss": 0.4987229108810425,
252
- "eval_runtime": 28.2377,
253
- "eval_samples_per_second": 4.816,
254
- "eval_steps_per_second": 0.319,
255
  "step": 211
256
  },
257
  {
258
  "epoch": 11.428571428571429,
259
- "grad_norm": 10.463237762451172,
260
  "learning_rate": 1.2695312500000001e-05,
261
- "loss": 0.3799,
262
  "step": 220
263
  },
264
  {
265
  "epoch": 11.948051948051948,
266
- "grad_norm": 9.576591491699219,
267
  "learning_rate": 1.0742187500000001e-05,
268
- "loss": 0.3827,
269
  "step": 230
270
  },
271
  {
272
  "epoch": 12.0,
273
- "eval_accuracy": 0.8088235294117647,
274
- "eval_loss": 0.47928401827812195,
275
- "eval_runtime": 29.299,
276
- "eval_samples_per_second": 4.642,
277
- "eval_steps_per_second": 0.307,
278
  "step": 231
279
  },
280
  {
281
  "epoch": 12.467532467532468,
282
- "grad_norm": 7.557727813720703,
283
  "learning_rate": 8.789062500000001e-06,
284
- "loss": 0.3419,
285
  "step": 240
286
  },
287
  {
288
  "epoch": 12.987012987012987,
289
- "grad_norm": 7.865088939666748,
290
  "learning_rate": 6.8359375e-06,
291
- "loss": 0.3095,
292
  "step": 250
293
  },
294
  {
295
  "epoch": 12.987012987012987,
296
- "eval_accuracy": 0.8014705882352942,
297
- "eval_loss": 0.47241097688674927,
298
- "eval_runtime": 28.3959,
299
- "eval_samples_per_second": 4.789,
300
- "eval_steps_per_second": 0.317,
301
  "step": 250
302
  },
303
  {
304
  "epoch": 13.506493506493506,
305
- "grad_norm": 15.5108060836792,
306
  "learning_rate": 4.8828125e-06,
307
- "loss": 0.3521,
308
  "step": 260
309
  },
310
  {
311
  "epoch": 13.974025974025974,
312
- "eval_accuracy": 0.8529411764705882,
313
- "eval_loss": 0.4389486610889435,
314
- "eval_runtime": 28.274,
315
- "eval_samples_per_second": 4.81,
316
- "eval_steps_per_second": 0.318,
317
  "step": 269
318
  },
319
  {
320
  "epoch": 14.025974025974026,
321
- "grad_norm": 9.100951194763184,
322
  "learning_rate": 2.9296875e-06,
323
- "loss": 0.3252,
324
  "step": 270
325
  },
326
  {
327
  "epoch": 14.545454545454545,
328
- "grad_norm": 11.608354568481445,
329
  "learning_rate": 9.765625e-07,
330
- "loss": 0.3397,
331
  "step": 280
332
  },
333
  {
334
  "epoch": 14.805194805194805,
335
- "eval_accuracy": 0.8455882352941176,
336
- "eval_loss": 0.43828412890434265,
337
- "eval_runtime": 28.4264,
338
- "eval_samples_per_second": 4.784,
339
- "eval_steps_per_second": 0.317,
340
  "step": 285
341
  },
342
  {
343
  "epoch": 14.805194805194805,
344
  "step": 285,
345
  "total_flos": 9.04681758989353e+16,
346
- "train_loss": 0.6850269221422965,
347
- "train_runtime": 4202.2024,
348
- "train_samples_per_second": 4.369,
349
- "train_steps_per_second": 0.068
350
  }
351
  ],
352
  "logging_steps": 10,
 
1
  {
2
+ "best_metric": 0.9117647058823529,
3
  "best_model_checkpoint": "deit-tiny-patch16-224-finetuned-papsmear/checkpoint-269",
4
  "epoch": 14.805194805194805,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.5194805194805194,
13
+ "grad_norm": 11.024985313415527,
14
  "learning_rate": 1.7241379310344828e-05,
15
+ "loss": 0.3521,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.987012987012987,
20
+ "eval_accuracy": 0.8235294117647058,
21
+ "eval_loss": 0.4595417082309723,
22
+ "eval_runtime": 28.5401,
23
+ "eval_samples_per_second": 4.765,
24
+ "eval_steps_per_second": 0.315,
25
  "step": 19
26
  },
27
  {
28
  "epoch": 1.0389610389610389,
29
+ "grad_norm": 14.813636779785156,
30
  "learning_rate": 3.4482758620689657e-05,
31
+ "loss": 0.3115,
32
  "step": 20
33
  },
34
  {
35
  "epoch": 1.5584415584415585,
36
+ "grad_norm": 16.087106704711914,
37
  "learning_rate": 4.9804687500000004e-05,
38
+ "loss": 0.335,
39
  "step": 30
40
  },
41
  {
42
  "epoch": 1.974025974025974,
43
+ "eval_accuracy": 0.8602941176470589,
44
+ "eval_loss": 0.4491410553455353,
45
+ "eval_runtime": 27.9762,
46
+ "eval_samples_per_second": 4.861,
47
+ "eval_steps_per_second": 0.322,
48
  "step": 38
49
  },
50
  {
51
  "epoch": 2.0779220779220777,
52
+ "grad_norm": 19.62188720703125,
53
  "learning_rate": 4.78515625e-05,
54
+ "loss": 0.4021,
55
  "step": 40
56
  },
57
  {
58
  "epoch": 2.5974025974025974,
59
+ "grad_norm": 12.998429298400879,
60
  "learning_rate": 4.58984375e-05,
61
+ "loss": 0.3248,
62
  "step": 50
63
  },
64
  {
65
  "epoch": 2.961038961038961,
66
+ "eval_accuracy": 0.875,
67
+ "eval_loss": 0.4195605218410492,
68
+ "eval_runtime": 28.3986,
69
+ "eval_samples_per_second": 4.789,
70
+ "eval_steps_per_second": 0.317,
71
  "step": 57
72
  },
73
  {
74
  "epoch": 3.116883116883117,
75
+ "grad_norm": 8.330426216125488,
76
  "learning_rate": 4.3945312500000005e-05,
77
+ "loss": 0.3613,
78
  "step": 60
79
  },
80
  {
81
  "epoch": 3.6363636363636362,
82
+ "grad_norm": 10.42120361328125,
83
  "learning_rate": 4.1992187500000003e-05,
84
+ "loss": 0.3271,
85
  "step": 70
86
  },
87
  {
88
  "epoch": 4.0,
89
+ "eval_accuracy": 0.8014705882352942,
90
+ "eval_loss": 0.5466737151145935,
91
+ "eval_runtime": 28.1607,
92
+ "eval_samples_per_second": 4.829,
93
+ "eval_steps_per_second": 0.32,
94
  "step": 77
95
  },
96
  {
97
  "epoch": 4.1558441558441555,
98
+ "grad_norm": 14.403099060058594,
99
  "learning_rate": 4.00390625e-05,
100
+ "loss": 0.3451,
101
  "step": 80
102
  },
103
  {
104
  "epoch": 4.675324675324675,
105
+ "grad_norm": 18.443952560424805,
106
  "learning_rate": 3.80859375e-05,
107
+ "loss": 0.3286,
108
  "step": 90
109
  },
110
  {
111
  "epoch": 4.987012987012987,
112
+ "eval_accuracy": 0.8161764705882353,
113
+ "eval_loss": 0.4768396317958832,
114
+ "eval_runtime": 28.7873,
115
+ "eval_samples_per_second": 4.724,
116
+ "eval_steps_per_second": 0.313,
117
  "step": 96
118
  },
119
  {
120
  "epoch": 5.194805194805195,
121
+ "grad_norm": 8.146430969238281,
122
  "learning_rate": 3.6132812500000005e-05,
123
+ "loss": 0.2577,
124
  "step": 100
125
  },
126
  {
127
  "epoch": 5.714285714285714,
128
+ "grad_norm": 7.037234306335449,
129
  "learning_rate": 3.41796875e-05,
130
+ "loss": 0.2854,
131
  "step": 110
132
  },
133
  {
134
  "epoch": 5.974025974025974,
135
+ "eval_accuracy": 0.8676470588235294,
136
+ "eval_loss": 0.41474443674087524,
137
+ "eval_runtime": 28.6719,
138
+ "eval_samples_per_second": 4.743,
139
+ "eval_steps_per_second": 0.314,
140
  "step": 115
141
  },
142
  {
143
  "epoch": 6.233766233766234,
144
+ "grad_norm": 10.498587608337402,
145
  "learning_rate": 3.22265625e-05,
146
+ "loss": 0.2295,
147
  "step": 120
148
  },
149
  {
150
  "epoch": 6.753246753246753,
151
+ "grad_norm": 13.865105628967285,
152
  "learning_rate": 3.02734375e-05,
153
+ "loss": 0.2291,
154
  "step": 130
155
  },
156
  {
157
  "epoch": 6.961038961038961,
158
+ "eval_accuracy": 0.8676470588235294,
159
+ "eval_loss": 0.4321264624595642,
160
+ "eval_runtime": 28.1356,
161
+ "eval_samples_per_second": 4.834,
162
+ "eval_steps_per_second": 0.32,
163
  "step": 134
164
  },
165
  {
166
  "epoch": 7.2727272727272725,
167
+ "grad_norm": 14.981769561767578,
168
  "learning_rate": 2.83203125e-05,
169
+ "loss": 0.2356,
170
  "step": 140
171
  },
172
  {
173
  "epoch": 7.792207792207792,
174
+ "grad_norm": 17.98302459716797,
175
  "learning_rate": 2.63671875e-05,
176
+ "loss": 0.2619,
177
  "step": 150
178
  },
179
  {
180
  "epoch": 8.0,
181
+ "eval_accuracy": 0.8235294117647058,
182
+ "eval_loss": 0.5726210474967957,
183
+ "eval_runtime": 28.5909,
184
+ "eval_samples_per_second": 4.757,
185
+ "eval_steps_per_second": 0.315,
186
  "step": 154
187
  },
188
  {
189
  "epoch": 8.311688311688311,
190
+ "grad_norm": 17.48947525024414,
191
  "learning_rate": 2.44140625e-05,
192
+ "loss": 0.2315,
193
  "step": 160
194
  },
195
  {
196
  "epoch": 8.831168831168831,
197
+ "grad_norm": 18.059572219848633,
198
  "learning_rate": 2.24609375e-05,
199
+ "loss": 0.2196,
200
  "step": 170
201
  },
202
  {
203
  "epoch": 8.987012987012987,
204
+ "eval_accuracy": 0.8676470588235294,
205
+ "eval_loss": 0.4344201982021332,
206
+ "eval_runtime": 28.8842,
207
+ "eval_samples_per_second": 4.708,
208
+ "eval_steps_per_second": 0.312,
209
  "step": 173
210
  },
211
  {
212
  "epoch": 9.35064935064935,
213
+ "grad_norm": 13.211594581604004,
214
  "learning_rate": 2.05078125e-05,
215
+ "loss": 0.22,
216
  "step": 180
217
  },
218
  {
219
  "epoch": 9.87012987012987,
220
+ "grad_norm": 14.067967414855957,
221
  "learning_rate": 1.85546875e-05,
222
+ "loss": 0.2116,
223
  "step": 190
224
  },
225
  {
226
  "epoch": 9.974025974025974,
227
+ "eval_accuracy": 0.875,
228
+ "eval_loss": 0.38092589378356934,
229
+ "eval_runtime": 28.2538,
230
+ "eval_samples_per_second": 4.814,
231
+ "eval_steps_per_second": 0.319,
232
  "step": 192
233
  },
234
  {
235
  "epoch": 10.38961038961039,
236
+ "grad_norm": 8.458534240722656,
237
  "learning_rate": 1.66015625e-05,
238
+ "loss": 0.1843,
239
  "step": 200
240
  },
241
  {
242
  "epoch": 10.909090909090908,
243
+ "grad_norm": 11.62593936920166,
244
  "learning_rate": 1.4648437500000001e-05,
245
+ "loss": 0.1913,
246
  "step": 210
247
  },
248
  {
249
  "epoch": 10.96103896103896,
250
+ "eval_accuracy": 0.8602941176470589,
251
+ "eval_loss": 0.37573114037513733,
252
+ "eval_runtime": 28.2614,
253
+ "eval_samples_per_second": 4.812,
254
+ "eval_steps_per_second": 0.318,
255
  "step": 211
256
  },
257
  {
258
  "epoch": 11.428571428571429,
259
+ "grad_norm": 15.429533958435059,
260
  "learning_rate": 1.2695312500000001e-05,
261
+ "loss": 0.1654,
262
  "step": 220
263
  },
264
  {
265
  "epoch": 11.948051948051948,
266
+ "grad_norm": 8.531671524047852,
267
  "learning_rate": 1.0742187500000001e-05,
268
+ "loss": 0.1604,
269
  "step": 230
270
  },
271
  {
272
  "epoch": 12.0,
273
+ "eval_accuracy": 0.8897058823529411,
274
+ "eval_loss": 0.35508498549461365,
275
+ "eval_runtime": 28.7628,
276
+ "eval_samples_per_second": 4.728,
277
+ "eval_steps_per_second": 0.313,
278
  "step": 231
279
  },
280
  {
281
  "epoch": 12.467532467532468,
282
+ "grad_norm": 7.988156318664551,
283
  "learning_rate": 8.789062500000001e-06,
284
+ "loss": 0.1558,
285
  "step": 240
286
  },
287
  {
288
  "epoch": 12.987012987012987,
289
+ "grad_norm": 6.887962341308594,
290
  "learning_rate": 6.8359375e-06,
291
+ "loss": 0.1307,
292
  "step": 250
293
  },
294
  {
295
  "epoch": 12.987012987012987,
296
+ "eval_accuracy": 0.8970588235294118,
297
+ "eval_loss": 0.3330402374267578,
298
+ "eval_runtime": 29.009,
299
+ "eval_samples_per_second": 4.688,
300
+ "eval_steps_per_second": 0.31,
301
  "step": 250
302
  },
303
  {
304
  "epoch": 13.506493506493506,
305
+ "grad_norm": 7.4578070640563965,
306
  "learning_rate": 4.8828125e-06,
307
+ "loss": 0.1425,
308
  "step": 260
309
  },
310
  {
311
  "epoch": 13.974025974025974,
312
+ "eval_accuracy": 0.9117647058823529,
313
+ "eval_loss": 0.34208229184150696,
314
+ "eval_runtime": 28.868,
315
+ "eval_samples_per_second": 4.711,
316
+ "eval_steps_per_second": 0.312,
317
  "step": 269
318
  },
319
  {
320
  "epoch": 14.025974025974026,
321
+ "grad_norm": 13.509991645812988,
322
  "learning_rate": 2.9296875e-06,
323
+ "loss": 0.1368,
324
  "step": 270
325
  },
326
  {
327
  "epoch": 14.545454545454545,
328
+ "grad_norm": 9.896185874938965,
329
  "learning_rate": 9.765625e-07,
330
+ "loss": 0.141,
331
  "step": 280
332
  },
333
  {
334
  "epoch": 14.805194805194805,
335
+ "eval_accuracy": 0.9117647058823529,
336
+ "eval_loss": 0.3409328758716583,
337
+ "eval_runtime": 28.479,
338
+ "eval_samples_per_second": 4.775,
339
+ "eval_steps_per_second": 0.316,
340
  "step": 285
341
  },
342
  {
343
  "epoch": 14.805194805194805,
344
  "step": 285,
345
  "total_flos": 9.04681758989353e+16,
346
+ "train_loss": 0.2430994153022766,
347
+ "train_runtime": 4010.606,
348
+ "train_samples_per_second": 4.578,
349
+ "train_steps_per_second": 0.071
350
  }
351
  ],
352
  "logging_steps": 10,