eugenecamus commited on
Commit
2adf64c
1 Parent(s): e5e819e

End of training

Browse files
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_accuracy": 0.9924812030075187,
4
- "eval_loss": 0.04292627424001694,
5
- "eval_runtime": 0.9184,
6
- "eval_samples_per_second": 144.81,
7
- "eval_steps_per_second": 18.509,
8
- "train_loss": 0.3968321686524611,
9
- "train_runtime": 71.285,
10
- "train_samples_per_second": 72.526,
11
- "train_steps_per_second": 9.118
12
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_accuracy": 0.9699248120300752,
4
+ "eval_loss": 0.10140984505414963,
5
+ "eval_runtime": 1.0066,
6
+ "eval_samples_per_second": 132.132,
7
+ "eval_steps_per_second": 16.889,
8
+ "train_loss": 0.3958505061039558,
9
+ "train_runtime": 122.678,
10
+ "train_samples_per_second": 42.143,
11
+ "train_steps_per_second": 5.298
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_accuracy": 0.9924812030075187,
4
- "eval_loss": 0.04292627424001694,
5
- "eval_runtime": 0.9184,
6
- "eval_samples_per_second": 144.81,
7
- "eval_steps_per_second": 18.509
8
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_accuracy": 0.9699248120300752,
4
+ "eval_loss": 0.10140984505414963,
5
+ "eval_runtime": 1.0066,
6
+ "eval_samples_per_second": 132.132,
7
+ "eval_steps_per_second": 16.889
8
  }
runs/May28_00-01-33_my-fastai-instance/events.out.tfevents.1653696263.my-fastai-instance.7560.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9ce4253b488b654778db2aa53b74775187e51d517f5090a09ad9ae348fa48a1
3
+ size 363
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 5.0,
3
- "train_loss": 0.3968321686524611,
4
- "train_runtime": 71.285,
5
- "train_samples_per_second": 72.526,
6
- "train_steps_per_second": 9.118
7
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "train_loss": 0.3958505061039558,
4
+ "train_runtime": 122.678,
5
+ "train_samples_per_second": 42.143,
6
+ "train_steps_per_second": 5.298
7
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.04292627424001694,
3
  "best_model_checkpoint": "./beans_outputs/checkpoint-650",
4
  "epoch": 5.0,
5
  "global_step": 650,
@@ -10,446 +10,446 @@
10
  {
11
  "epoch": 0.08,
12
  "learning_rate": 0.0003076923076923077,
13
- "loss": 1.0843,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.15,
18
  "learning_rate": 0.0006153846153846154,
19
- "loss": 1.074,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.23,
24
  "learning_rate": 0.0009230769230769232,
25
- "loss": 1.0582,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.31,
30
  "learning_rate": 0.0012307692307692308,
31
- "loss": 0.9178,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.38,
36
  "learning_rate": 0.0015384615384615387,
37
- "loss": 0.8364,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.46,
42
  "learning_rate": 0.0018461538461538463,
43
- "loss": 0.5077,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.54,
48
  "learning_rate": 0.0019996395276708854,
49
- "loss": 0.4777,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.62,
54
  "learning_rate": 0.00199675730813421,
55
- "loss": 0.6303,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.69,
60
  "learning_rate": 0.0019910011792459085,
61
- "loss": 0.558,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.77,
66
  "learning_rate": 0.0019823877374156645,
67
- "loss": 0.5047,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.85,
72
  "learning_rate": 0.001970941817426052,
73
- "loss": 0.5314,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.92,
78
  "learning_rate": 0.0019566964208274254,
79
- "loss": 0.6402,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 1.0,
84
  "learning_rate": 0.0019396926207859084,
85
- "loss": 0.7703,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 1.0,
90
- "eval_accuracy": 0.5263157894736842,
91
- "eval_loss": 1.22383451461792,
92
- "eval_runtime": 0.797,
93
- "eval_samples_per_second": 166.882,
94
- "eval_steps_per_second": 21.331,
95
  "step": 130
96
  },
97
  {
98
  "epoch": 1.08,
99
  "learning_rate": 0.0019199794436588243,
100
- "loss": 0.5088,
101
  "step": 140
102
  },
103
  {
104
  "epoch": 1.15,
105
  "learning_rate": 0.0018976137276390142,
106
- "loss": 0.45,
107
  "step": 150
108
  },
109
  {
110
  "epoch": 1.23,
111
  "learning_rate": 0.0018726599588756144,
112
- "loss": 0.5657,
113
  "step": 160
114
  },
115
  {
116
  "epoch": 1.31,
117
  "learning_rate": 0.001845190085543795,
118
- "loss": 0.4771,
119
  "step": 170
120
  },
121
  {
122
  "epoch": 1.38,
123
  "learning_rate": 0.0018152833103995444,
124
- "loss": 0.5755,
125
  "step": 180
126
  },
127
  {
128
  "epoch": 1.46,
129
  "learning_rate": 0.0017830258624176223,
130
- "loss": 0.414,
131
  "step": 190
132
  },
133
  {
134
  "epoch": 1.54,
135
  "learning_rate": 0.001748510748171101,
136
- "loss": 0.217,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 1.62,
141
  "learning_rate": 0.0017118374836693406,
142
- "loss": 0.4516,
143
  "step": 210
144
  },
145
  {
146
  "epoch": 1.69,
147
  "learning_rate": 0.0016731118074275702,
148
- "loss": 0.533,
149
  "step": 220
150
  },
151
  {
152
  "epoch": 1.77,
153
  "learning_rate": 0.0016324453755953772,
154
- "loss": 0.4964,
155
  "step": 230
156
  },
157
  {
158
  "epoch": 1.85,
159
  "learning_rate": 0.001589955440023123,
160
- "loss": 0.3408,
161
  "step": 240
162
  },
163
  {
164
  "epoch": 1.92,
165
  "learning_rate": 0.0015457645101945045,
166
- "loss": 0.3039,
167
  "step": 250
168
  },
169
  {
170
  "epoch": 2.0,
171
  "learning_rate": 0.0015,
172
- "loss": 0.4905,
173
  "step": 260
174
  },
175
  {
176
  "epoch": 2.0,
177
- "eval_accuracy": 0.8270676691729323,
178
- "eval_loss": 0.5192657709121704,
179
- "eval_runtime": 0.7485,
180
- "eval_samples_per_second": 177.694,
181
- "eval_steps_per_second": 22.713,
182
  "step": 260
183
  },
184
  {
185
  "epoch": 2.08,
186
  "learning_rate": 0.0014527938603696375,
187
- "loss": 0.6346,
188
  "step": 270
189
  },
190
  {
191
  "epoch": 2.15,
192
  "learning_rate": 0.0014042821988243049,
193
- "loss": 0.4142,
194
  "step": 280
195
  },
196
  {
197
  "epoch": 2.23,
198
  "learning_rate": 0.0013546048870425357,
199
- "loss": 0.3863,
200
  "step": 290
201
  },
202
  {
203
  "epoch": 2.31,
204
  "learning_rate": 0.0013039051575742468,
205
- "loss": 0.2966,
206
  "step": 300
207
  },
208
  {
209
  "epoch": 2.38,
210
  "learning_rate": 0.0012523291908642217,
211
- "loss": 0.49,
212
  "step": 310
213
  },
214
  {
215
  "epoch": 2.46,
216
  "learning_rate": 0.0012000256937760446,
217
- "loss": 0.2886,
218
  "step": 320
219
  },
220
  {
221
  "epoch": 2.54,
222
  "learning_rate": 0.001147145470831716,
223
- "loss": 0.4434,
224
  "step": 330
225
  },
226
  {
227
  "epoch": 2.62,
228
  "learning_rate": 0.0010938409894031794,
229
- "loss": 0.2125,
230
  "step": 340
231
  },
232
  {
233
  "epoch": 2.69,
234
  "learning_rate": 0.0010402659401094153,
235
- "loss": 0.2523,
236
  "step": 350
237
  },
238
  {
239
  "epoch": 2.77,
240
  "learning_rate": 0.0009865747936866028,
241
- "loss": 0.2735,
242
  "step": 360
243
  },
244
  {
245
  "epoch": 2.85,
246
  "learning_rate": 0.0009329223556089975,
247
- "loss": 0.27,
248
  "step": 370
249
  },
250
  {
251
  "epoch": 2.92,
252
  "learning_rate": 0.0008794633197446771,
253
- "loss": 0.2445,
254
  "step": 380
255
  },
256
  {
257
  "epoch": 3.0,
258
  "learning_rate": 0.0008263518223330697,
259
- "loss": 0.4793,
260
  "step": 390
261
  },
262
  {
263
  "epoch": 3.0,
264
- "eval_accuracy": 0.9699248120300752,
265
- "eval_loss": 0.14211735129356384,
266
- "eval_runtime": 0.7484,
267
- "eval_samples_per_second": 177.701,
268
- "eval_steps_per_second": 22.714,
269
  "step": 390
270
  },
271
  {
272
  "epoch": 3.08,
273
  "learning_rate": 0.0007737409975702781,
274
- "loss": 0.2375,
275
  "step": 400
276
  },
277
  {
278
  "epoch": 3.15,
279
  "learning_rate": 0.0007217825360835474,
280
- "loss": 0.3971,
281
  "step": 410
282
  },
283
  {
284
  "epoch": 3.23,
285
  "learning_rate": 0.0006706262475679205,
286
- "loss": 0.3654,
287
  "step": 420
288
  },
289
  {
290
  "epoch": 3.31,
291
  "learning_rate": 0.0006204196288461038,
292
- "loss": 0.2186,
293
  "step": 430
294
  },
295
  {
296
  "epoch": 3.38,
297
  "learning_rate": 0.0005713074385969457,
298
- "loss": 0.2977,
299
  "step": 440
300
  },
301
  {
302
  "epoch": 3.46,
303
  "learning_rate": 0.0005234312799786921,
304
- "loss": 0.1177,
305
  "step": 450
306
  },
307
  {
308
  "epoch": 3.54,
309
  "learning_rate": 0.00047692919235042255,
310
- "loss": 0.4466,
311
  "step": 460
312
  },
313
  {
314
  "epoch": 3.62,
315
  "learning_rate": 0.0004319352532688443,
316
- "loss": 0.2118,
317
  "step": 470
318
  },
319
  {
320
  "epoch": 3.69,
321
  "learning_rate": 0.0003885791919079877,
322
- "loss": 0.2298,
323
  "step": 480
324
  },
325
  {
326
  "epoch": 3.77,
327
  "learning_rate": 0.0003469860150164151,
328
- "loss": 0.3758,
329
  "step": 490
330
  },
331
  {
332
  "epoch": 3.85,
333
  "learning_rate": 0.00030727564649040063,
334
- "loss": 0.2384,
335
  "step": 500
336
  },
337
  {
338
  "epoch": 3.92,
339
  "learning_rate": 0.00026956258160229697,
340
- "loss": 0.1422,
341
  "step": 510
342
  },
343
  {
344
  "epoch": 4.0,
345
  "learning_rate": 0.0002339555568810221,
346
- "loss": 0.2986,
347
  "step": 520
348
  },
349
  {
350
  "epoch": 4.0,
351
- "eval_accuracy": 0.9624060150375939,
352
- "eval_loss": 0.07603178173303604,
353
- "eval_runtime": 0.7494,
354
- "eval_samples_per_second": 177.482,
355
- "eval_steps_per_second": 22.686,
356
  "step": 520
357
  },
358
  {
359
  "epoch": 4.08,
360
  "learning_rate": 0.00020055723659649904,
361
- "loss": 0.1728,
362
  "step": 530
363
  },
364
  {
365
  "epoch": 4.15,
366
  "learning_rate": 0.00016946391675198836,
367
- "loss": 0.1329,
368
  "step": 540
369
  },
370
  {
371
  "epoch": 4.23,
372
  "learning_rate": 0.00014076524743778318,
373
- "loss": 0.1473,
374
  "step": 550
375
  },
376
  {
377
  "epoch": 4.31,
378
  "learning_rate": 0.0001145439743467902,
379
- "loss": 0.1782,
380
  "step": 560
381
  },
382
  {
383
  "epoch": 4.38,
384
  "learning_rate": 9.08757001972762e-05,
385
- "loss": 0.1615,
386
  "step": 570
387
  },
388
  {
389
  "epoch": 4.46,
390
  "learning_rate": 6.98286667506618e-05,
391
- "loss": 0.2641,
392
  "step": 580
393
  },
394
  {
395
  "epoch": 4.54,
396
  "learning_rate": 5.146355805285452e-05,
397
- "loss": 0.167,
398
  "step": 590
399
  },
400
  {
401
  "epoch": 4.62,
402
  "learning_rate": 3.5833325466437696e-05,
403
- "loss": 0.1689,
404
  "step": 600
405
  },
406
  {
407
  "epoch": 4.69,
408
  "learning_rate": 2.2983034998182995e-05,
409
- "loss": 0.1596,
410
  "step": 610
411
  },
412
  {
413
  "epoch": 4.77,
414
  "learning_rate": 1.2949737362087154e-05,
415
- "loss": 0.0924,
416
  "step": 620
417
  },
418
  {
419
  "epoch": 4.85,
420
  "learning_rate": 5.7623611525721155e-06,
421
- "loss": 0.1395,
422
  "step": 630
423
  },
424
  {
425
  "epoch": 4.92,
426
  "learning_rate": 1.4416294358582382e-06,
427
- "loss": 0.1392,
428
  "step": 640
429
  },
430
  {
431
  "epoch": 5.0,
432
  "learning_rate": 0.0,
433
- "loss": 0.1927,
434
  "step": 650
435
  },
436
  {
437
  "epoch": 5.0,
438
- "eval_accuracy": 0.9924812030075187,
439
- "eval_loss": 0.04292627424001694,
440
- "eval_runtime": 0.7439,
441
- "eval_samples_per_second": 178.791,
442
- "eval_steps_per_second": 22.853,
443
  "step": 650
444
  },
445
  {
446
  "epoch": 5.0,
447
  "step": 650,
448
  "total_flos": 1.0979660327860224e+17,
449
- "train_loss": 0.3968321686524611,
450
- "train_runtime": 71.285,
451
- "train_samples_per_second": 72.526,
452
- "train_steps_per_second": 9.118
453
  }
454
  ],
455
  "max_steps": 650,
 
1
  {
2
+ "best_metric": 0.10140984505414963,
3
  "best_model_checkpoint": "./beans_outputs/checkpoint-650",
4
  "epoch": 5.0,
5
  "global_step": 650,
 
10
  {
11
  "epoch": 0.08,
12
  "learning_rate": 0.0003076923076923077,
13
+ "loss": 1.0889,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.15,
18
  "learning_rate": 0.0006153846153846154,
19
+ "loss": 1.0728,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.23,
24
  "learning_rate": 0.0009230769230769232,
25
+ "loss": 1.0675,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.31,
30
  "learning_rate": 0.0012307692307692308,
31
+ "loss": 0.9244,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.38,
36
  "learning_rate": 0.0015384615384615387,
37
+ "loss": 0.8006,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.46,
42
  "learning_rate": 0.0018461538461538463,
43
+ "loss": 0.7104,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.54,
48
  "learning_rate": 0.0019996395276708854,
49
+ "loss": 0.5492,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.62,
54
  "learning_rate": 0.00199675730813421,
55
+ "loss": 0.438,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.69,
60
  "learning_rate": 0.0019910011792459085,
61
+ "loss": 0.5676,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.77,
66
  "learning_rate": 0.0019823877374156645,
67
+ "loss": 0.4354,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.85,
72
  "learning_rate": 0.001970941817426052,
73
+ "loss": 0.3349,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.92,
78
  "learning_rate": 0.0019566964208274254,
79
+ "loss": 0.7846,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 1.0,
84
  "learning_rate": 0.0019396926207859084,
85
+ "loss": 0.9524,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 1.0,
90
+ "eval_accuracy": 0.8646616541353384,
91
+ "eval_loss": 0.28258681297302246,
92
+ "eval_runtime": 0.9861,
93
+ "eval_samples_per_second": 134.87,
94
+ "eval_steps_per_second": 17.239,
95
  "step": 130
96
  },
97
  {
98
  "epoch": 1.08,
99
  "learning_rate": 0.0019199794436588243,
100
+ "loss": 0.5461,
101
  "step": 140
102
  },
103
  {
104
  "epoch": 1.15,
105
  "learning_rate": 0.0018976137276390142,
106
+ "loss": 0.7203,
107
  "step": 150
108
  },
109
  {
110
  "epoch": 1.23,
111
  "learning_rate": 0.0018726599588756144,
112
+ "loss": 0.4123,
113
  "step": 160
114
  },
115
  {
116
  "epoch": 1.31,
117
  "learning_rate": 0.001845190085543795,
118
+ "loss": 0.5495,
119
  "step": 170
120
  },
121
  {
122
  "epoch": 1.38,
123
  "learning_rate": 0.0018152833103995444,
124
+ "loss": 0.4613,
125
  "step": 180
126
  },
127
  {
128
  "epoch": 1.46,
129
  "learning_rate": 0.0017830258624176223,
130
+ "loss": 0.4326,
131
  "step": 190
132
  },
133
  {
134
  "epoch": 1.54,
135
  "learning_rate": 0.001748510748171101,
136
+ "loss": 0.3239,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 1.62,
141
  "learning_rate": 0.0017118374836693406,
142
+ "loss": 0.5905,
143
  "step": 210
144
  },
145
  {
146
  "epoch": 1.69,
147
  "learning_rate": 0.0016731118074275702,
148
+ "loss": 0.5519,
149
  "step": 220
150
  },
151
  {
152
  "epoch": 1.77,
153
  "learning_rate": 0.0016324453755953772,
154
+ "loss": 0.2935,
155
  "step": 230
156
  },
157
  {
158
  "epoch": 1.85,
159
  "learning_rate": 0.001589955440023123,
160
+ "loss": 0.3501,
161
  "step": 240
162
  },
163
  {
164
  "epoch": 1.92,
165
  "learning_rate": 0.0015457645101945045,
166
+ "loss": 0.4842,
167
  "step": 250
168
  },
169
  {
170
  "epoch": 2.0,
171
  "learning_rate": 0.0015,
172
+ "loss": 0.3596,
173
  "step": 260
174
  },
175
  {
176
  "epoch": 2.0,
177
+ "eval_accuracy": 0.9022556390977443,
178
+ "eval_loss": 0.22159069776535034,
179
+ "eval_runtime": 0.9658,
180
+ "eval_samples_per_second": 137.712,
181
+ "eval_steps_per_second": 17.602,
182
  "step": 260
183
  },
184
  {
185
  "epoch": 2.08,
186
  "learning_rate": 0.0014527938603696375,
187
+ "loss": 0.578,
188
  "step": 270
189
  },
190
  {
191
  "epoch": 2.15,
192
  "learning_rate": 0.0014042821988243049,
193
+ "loss": 0.3116,
194
  "step": 280
195
  },
196
  {
197
  "epoch": 2.23,
198
  "learning_rate": 0.0013546048870425357,
199
+ "loss": 0.3378,
200
  "step": 290
201
  },
202
  {
203
  "epoch": 2.31,
204
  "learning_rate": 0.0013039051575742468,
205
+ "loss": 0.3679,
206
  "step": 300
207
  },
208
  {
209
  "epoch": 2.38,
210
  "learning_rate": 0.0012523291908642217,
211
+ "loss": 0.38,
212
  "step": 310
213
  },
214
  {
215
  "epoch": 2.46,
216
  "learning_rate": 0.0012000256937760446,
217
+ "loss": 0.3309,
218
  "step": 320
219
  },
220
  {
221
  "epoch": 2.54,
222
  "learning_rate": 0.001147145470831716,
223
+ "loss": 0.2248,
224
  "step": 330
225
  },
226
  {
227
  "epoch": 2.62,
228
  "learning_rate": 0.0010938409894031794,
229
+ "loss": 0.3261,
230
  "step": 340
231
  },
232
  {
233
  "epoch": 2.69,
234
  "learning_rate": 0.0010402659401094153,
235
+ "loss": 0.2697,
236
  "step": 350
237
  },
238
  {
239
  "epoch": 2.77,
240
  "learning_rate": 0.0009865747936866028,
241
+ "loss": 0.3062,
242
  "step": 360
243
  },
244
  {
245
  "epoch": 2.85,
246
  "learning_rate": 0.0009329223556089975,
247
+ "loss": 0.3551,
248
  "step": 370
249
  },
250
  {
251
  "epoch": 2.92,
252
  "learning_rate": 0.0008794633197446771,
253
+ "loss": 0.2783,
254
  "step": 380
255
  },
256
  {
257
  "epoch": 3.0,
258
  "learning_rate": 0.0008263518223330697,
259
+ "loss": 0.2419,
260
  "step": 390
261
  },
262
  {
263
  "epoch": 3.0,
264
+ "eval_accuracy": 0.9473684210526315,
265
+ "eval_loss": 0.13237899541854858,
266
+ "eval_runtime": 0.9783,
267
+ "eval_samples_per_second": 135.946,
268
+ "eval_steps_per_second": 17.377,
269
  "step": 390
270
  },
271
  {
272
  "epoch": 3.08,
273
  "learning_rate": 0.0007737409975702781,
274
+ "loss": 0.2691,
275
  "step": 400
276
  },
277
  {
278
  "epoch": 3.15,
279
  "learning_rate": 0.0007217825360835474,
280
+ "loss": 0.3115,
281
  "step": 410
282
  },
283
  {
284
  "epoch": 3.23,
285
  "learning_rate": 0.0006706262475679205,
286
+ "loss": 0.2924,
287
  "step": 420
288
  },
289
  {
290
  "epoch": 3.31,
291
  "learning_rate": 0.0006204196288461038,
292
+ "loss": 0.2037,
293
  "step": 430
294
  },
295
  {
296
  "epoch": 3.38,
297
  "learning_rate": 0.0005713074385969457,
298
+ "loss": 0.2782,
299
  "step": 440
300
  },
301
  {
302
  "epoch": 3.46,
303
  "learning_rate": 0.0005234312799786921,
304
+ "loss": 0.1141,
305
  "step": 450
306
  },
307
  {
308
  "epoch": 3.54,
309
  "learning_rate": 0.00047692919235042255,
310
+ "loss": 0.504,
311
  "step": 460
312
  },
313
  {
314
  "epoch": 3.62,
315
  "learning_rate": 0.0004319352532688443,
316
+ "loss": 0.1391,
317
  "step": 470
318
  },
319
  {
320
  "epoch": 3.69,
321
  "learning_rate": 0.0003885791919079877,
322
+ "loss": 0.4455,
323
  "step": 480
324
  },
325
  {
326
  "epoch": 3.77,
327
  "learning_rate": 0.0003469860150164151,
328
+ "loss": 0.1956,
329
  "step": 490
330
  },
331
  {
332
  "epoch": 3.85,
333
  "learning_rate": 0.00030727564649040063,
334
+ "loss": 0.2292,
335
  "step": 500
336
  },
337
  {
338
  "epoch": 3.92,
339
  "learning_rate": 0.00026956258160229697,
340
+ "loss": 0.2151,
341
  "step": 510
342
  },
343
  {
344
  "epoch": 4.0,
345
  "learning_rate": 0.0002339555568810221,
346
+ "loss": 0.3248,
347
  "step": 520
348
  },
349
  {
350
  "epoch": 4.0,
351
+ "eval_accuracy": 0.9699248120300752,
352
+ "eval_loss": 0.11242649704217911,
353
+ "eval_runtime": 0.9749,
354
+ "eval_samples_per_second": 136.426,
355
+ "eval_steps_per_second": 17.438,
356
  "step": 520
357
  },
358
  {
359
  "epoch": 4.08,
360
  "learning_rate": 0.00020055723659649904,
361
+ "loss": 0.0799,
362
  "step": 530
363
  },
364
  {
365
  "epoch": 4.15,
366
  "learning_rate": 0.00016946391675198836,
367
+ "loss": 0.1854,
368
  "step": 540
369
  },
370
  {
371
  "epoch": 4.23,
372
  "learning_rate": 0.00014076524743778318,
373
+ "loss": 0.2312,
374
  "step": 550
375
  },
376
  {
377
  "epoch": 4.31,
378
  "learning_rate": 0.0001145439743467902,
379
+ "loss": 0.1739,
380
  "step": 560
381
  },
382
  {
383
  "epoch": 4.38,
384
  "learning_rate": 9.08757001972762e-05,
385
+ "loss": 0.1797,
386
  "step": 570
387
  },
388
  {
389
  "epoch": 4.46,
390
  "learning_rate": 6.98286667506618e-05,
391
+ "loss": 0.1376,
392
  "step": 580
393
  },
394
  {
395
  "epoch": 4.54,
396
  "learning_rate": 5.146355805285452e-05,
397
+ "loss": 0.11,
398
  "step": 590
399
  },
400
  {
401
  "epoch": 4.62,
402
  "learning_rate": 3.5833325466437696e-05,
403
+ "loss": 0.1737,
404
  "step": 600
405
  },
406
  {
407
  "epoch": 4.69,
408
  "learning_rate": 2.2983034998182995e-05,
409
+ "loss": 0.1018,
410
  "step": 610
411
  },
412
  {
413
  "epoch": 4.77,
414
  "learning_rate": 1.2949737362087154e-05,
415
+ "loss": 0.2031,
416
  "step": 620
417
  },
418
  {
419
  "epoch": 4.85,
420
  "learning_rate": 5.7623611525721155e-06,
421
+ "loss": 0.1868,
422
  "step": 630
423
  },
424
  {
425
  "epoch": 4.92,
426
  "learning_rate": 1.4416294358582382e-06,
427
+ "loss": 0.1781,
428
  "step": 640
429
  },
430
  {
431
  "epoch": 5.0,
432
  "learning_rate": 0.0,
433
+ "loss": 0.1557,
434
  "step": 650
435
  },
436
  {
437
  "epoch": 5.0,
438
+ "eval_accuracy": 0.9699248120300752,
439
+ "eval_loss": 0.10140984505414963,
440
+ "eval_runtime": 1.0018,
441
+ "eval_samples_per_second": 132.757,
442
+ "eval_steps_per_second": 16.969,
443
  "step": 650
444
  },
445
  {
446
  "epoch": 5.0,
447
  "step": 650,
448
  "total_flos": 1.0979660327860224e+17,
449
+ "train_loss": 0.3958505061039558,
450
+ "train_runtime": 122.678,
451
+ "train_samples_per_second": 42.143,
452
+ "train_steps_per_second": 5.298
453
  }
454
  ],
455
  "max_steps": 650,