eugenecamus commited on
Commit
3a834fe
1 Parent(s): 0c57bdd

End of training

Browse files
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_accuracy": 0.9699248120300752,
4
- "eval_loss": 0.10140984505414963,
5
- "eval_runtime": 1.0066,
6
- "eval_samples_per_second": 132.132,
7
- "eval_steps_per_second": 16.889,
8
- "train_loss": 0.3958505061039558,
9
- "train_runtime": 122.678,
10
- "train_samples_per_second": 42.143,
11
- "train_steps_per_second": 5.298
12
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_accuracy": 0.9022556390977443,
4
+ "eval_loss": 0.21882569789886475,
5
+ "eval_runtime": 0.8786,
6
+ "eval_samples_per_second": 151.372,
7
+ "eval_steps_per_second": 19.348,
8
+ "train_loss": 0.6012275494061984,
9
+ "train_runtime": 20.9633,
10
+ "train_samples_per_second": 49.324,
11
+ "train_steps_per_second": 6.201
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_accuracy": 0.9699248120300752,
4
- "eval_loss": 0.10140984505414963,
5
- "eval_runtime": 1.0066,
6
- "eval_samples_per_second": 132.132,
7
- "eval_steps_per_second": 16.889
8
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_accuracy": 0.9022556390977443,
4
+ "eval_loss": 0.21882569789886475,
5
+ "eval_runtime": 0.8786,
6
+ "eval_samples_per_second": 151.372,
7
+ "eval_steps_per_second": 19.348
8
  }
runs/May31_10-43-13_ydupis-X570-AORUS-MASTER/events.out.tfevents.1654019265.ydupis-X570-AORUS-MASTER.15341.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57f21a9173f13accf0a2461e5325a23646ad76588fd81df085164be6ce634fbf
3
+ size 363
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 5.0,
3
- "train_loss": 0.3958505061039558,
4
- "train_runtime": 122.678,
5
- "train_samples_per_second": 42.143,
6
- "train_steps_per_second": 5.298
7
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.6012275494061984,
4
+ "train_runtime": 20.9633,
5
+ "train_samples_per_second": 49.324,
6
+ "train_steps_per_second": 6.201
7
  }
trainer_state.json CHANGED
@@ -1,460 +1,112 @@
1
  {
2
- "best_metric": 0.10140984505414963,
3
- "best_model_checkpoint": "./beans_outputs/checkpoint-650",
4
- "epoch": 5.0,
5
- "global_step": 650,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.08,
12
- "learning_rate": 0.0003076923076923077,
13
- "loss": 1.0889,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.15,
18
- "learning_rate": 0.0006153846153846154,
19
- "loss": 1.0728,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.23,
24
- "learning_rate": 0.0009230769230769232,
25
- "loss": 1.0675,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.31,
30
- "learning_rate": 0.0012307692307692308,
31
- "loss": 0.9244,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.38,
36
- "learning_rate": 0.0015384615384615387,
37
- "loss": 0.8006,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.46,
42
- "learning_rate": 0.0018461538461538463,
43
- "loss": 0.7104,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.54,
48
- "learning_rate": 0.0019996395276708854,
49
- "loss": 0.5492,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.62,
54
- "learning_rate": 0.00199675730813421,
55
- "loss": 0.438,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.69,
60
- "learning_rate": 0.0019910011792459085,
61
- "loss": 0.5676,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.77,
66
- "learning_rate": 0.0019823877374156645,
67
- "loss": 0.4354,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.85,
72
- "learning_rate": 0.001970941817426052,
73
- "loss": 0.3349,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.92,
78
- "learning_rate": 0.0019566964208274254,
79
- "loss": 0.7846,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 1.0,
84
- "learning_rate": 0.0019396926207859084,
85
- "loss": 0.9524,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 1.0,
90
- "eval_accuracy": 0.8646616541353384,
91
- "eval_loss": 0.28258681297302246,
92
- "eval_runtime": 0.9861,
93
- "eval_samples_per_second": 134.87,
94
- "eval_steps_per_second": 17.239,
95
- "step": 130
96
- },
97
- {
98
- "epoch": 1.08,
99
- "learning_rate": 0.0019199794436588243,
100
- "loss": 0.5461,
101
- "step": 140
102
- },
103
- {
104
- "epoch": 1.15,
105
- "learning_rate": 0.0018976137276390142,
106
- "loss": 0.7203,
107
- "step": 150
108
- },
109
- {
110
- "epoch": 1.23,
111
- "learning_rate": 0.0018726599588756144,
112
- "loss": 0.4123,
113
- "step": 160
114
- },
115
- {
116
- "epoch": 1.31,
117
- "learning_rate": 0.001845190085543795,
118
- "loss": 0.5495,
119
- "step": 170
120
- },
121
- {
122
- "epoch": 1.38,
123
- "learning_rate": 0.0018152833103995444,
124
- "loss": 0.4613,
125
- "step": 180
126
- },
127
- {
128
- "epoch": 1.46,
129
- "learning_rate": 0.0017830258624176223,
130
- "loss": 0.4326,
131
- "step": 190
132
- },
133
- {
134
- "epoch": 1.54,
135
- "learning_rate": 0.001748510748171101,
136
- "loss": 0.3239,
137
- "step": 200
138
- },
139
- {
140
- "epoch": 1.62,
141
- "learning_rate": 0.0017118374836693406,
142
- "loss": 0.5905,
143
- "step": 210
144
- },
145
- {
146
- "epoch": 1.69,
147
- "learning_rate": 0.0016731118074275702,
148
- "loss": 0.5519,
149
- "step": 220
150
- },
151
- {
152
- "epoch": 1.77,
153
- "learning_rate": 0.0016324453755953772,
154
- "loss": 0.2935,
155
- "step": 230
156
- },
157
- {
158
- "epoch": 1.85,
159
- "learning_rate": 0.001589955440023123,
160
- "loss": 0.3501,
161
- "step": 240
162
- },
163
- {
164
- "epoch": 1.92,
165
- "learning_rate": 0.0015457645101945045,
166
- "loss": 0.4842,
167
- "step": 250
168
- },
169
- {
170
- "epoch": 2.0,
171
- "learning_rate": 0.0015,
172
- "loss": 0.3596,
173
- "step": 260
174
- },
175
- {
176
- "epoch": 2.0,
177
  "eval_accuracy": 0.9022556390977443,
178
- "eval_loss": 0.22159069776535034,
179
- "eval_runtime": 0.9658,
180
- "eval_samples_per_second": 137.712,
181
- "eval_steps_per_second": 17.602,
182
- "step": 260
183
- },
184
- {
185
- "epoch": 2.08,
186
- "learning_rate": 0.0014527938603696375,
187
- "loss": 0.578,
188
- "step": 270
189
- },
190
- {
191
- "epoch": 2.15,
192
- "learning_rate": 0.0014042821988243049,
193
- "loss": 0.3116,
194
- "step": 280
195
- },
196
- {
197
- "epoch": 2.23,
198
- "learning_rate": 0.0013546048870425357,
199
- "loss": 0.3378,
200
- "step": 290
201
- },
202
- {
203
- "epoch": 2.31,
204
- "learning_rate": 0.0013039051575742468,
205
- "loss": 0.3679,
206
- "step": 300
207
- },
208
- {
209
- "epoch": 2.38,
210
- "learning_rate": 0.0012523291908642217,
211
- "loss": 0.38,
212
- "step": 310
213
- },
214
- {
215
- "epoch": 2.46,
216
- "learning_rate": 0.0012000256937760446,
217
- "loss": 0.3309,
218
- "step": 320
219
- },
220
- {
221
- "epoch": 2.54,
222
- "learning_rate": 0.001147145470831716,
223
- "loss": 0.2248,
224
- "step": 330
225
- },
226
- {
227
- "epoch": 2.62,
228
- "learning_rate": 0.0010938409894031794,
229
- "loss": 0.3261,
230
- "step": 340
231
- },
232
- {
233
- "epoch": 2.69,
234
- "learning_rate": 0.0010402659401094153,
235
- "loss": 0.2697,
236
- "step": 350
237
- },
238
- {
239
- "epoch": 2.77,
240
- "learning_rate": 0.0009865747936866028,
241
- "loss": 0.3062,
242
- "step": 360
243
- },
244
- {
245
- "epoch": 2.85,
246
- "learning_rate": 0.0009329223556089975,
247
- "loss": 0.3551,
248
- "step": 370
249
- },
250
- {
251
- "epoch": 2.92,
252
- "learning_rate": 0.0008794633197446771,
253
- "loss": 0.2783,
254
- "step": 380
255
- },
256
- {
257
- "epoch": 3.0,
258
- "learning_rate": 0.0008263518223330697,
259
- "loss": 0.2419,
260
- "step": 390
261
- },
262
- {
263
- "epoch": 3.0,
264
- "eval_accuracy": 0.9473684210526315,
265
- "eval_loss": 0.13237899541854858,
266
- "eval_runtime": 0.9783,
267
- "eval_samples_per_second": 135.946,
268
- "eval_steps_per_second": 17.377,
269
- "step": 390
270
- },
271
- {
272
- "epoch": 3.08,
273
- "learning_rate": 0.0007737409975702781,
274
- "loss": 0.2691,
275
- "step": 400
276
- },
277
- {
278
- "epoch": 3.15,
279
- "learning_rate": 0.0007217825360835474,
280
- "loss": 0.3115,
281
- "step": 410
282
- },
283
- {
284
- "epoch": 3.23,
285
- "learning_rate": 0.0006706262475679205,
286
- "loss": 0.2924,
287
- "step": 420
288
- },
289
- {
290
- "epoch": 3.31,
291
- "learning_rate": 0.0006204196288461038,
292
- "loss": 0.2037,
293
- "step": 430
294
- },
295
- {
296
- "epoch": 3.38,
297
- "learning_rate": 0.0005713074385969457,
298
- "loss": 0.2782,
299
- "step": 440
300
- },
301
- {
302
- "epoch": 3.46,
303
- "learning_rate": 0.0005234312799786921,
304
- "loss": 0.1141,
305
- "step": 450
306
- },
307
- {
308
- "epoch": 3.54,
309
- "learning_rate": 0.00047692919235042255,
310
- "loss": 0.504,
311
- "step": 460
312
- },
313
- {
314
- "epoch": 3.62,
315
- "learning_rate": 0.0004319352532688443,
316
- "loss": 0.1391,
317
- "step": 470
318
- },
319
- {
320
- "epoch": 3.69,
321
- "learning_rate": 0.0003885791919079877,
322
- "loss": 0.4455,
323
- "step": 480
324
- },
325
- {
326
- "epoch": 3.77,
327
- "learning_rate": 0.0003469860150164151,
328
- "loss": 0.1956,
329
- "step": 490
330
- },
331
- {
332
- "epoch": 3.85,
333
- "learning_rate": 0.00030727564649040063,
334
- "loss": 0.2292,
335
- "step": 500
336
- },
337
- {
338
- "epoch": 3.92,
339
- "learning_rate": 0.00026956258160229697,
340
- "loss": 0.2151,
341
- "step": 510
342
- },
343
- {
344
- "epoch": 4.0,
345
- "learning_rate": 0.0002339555568810221,
346
- "loss": 0.3248,
347
- "step": 520
348
- },
349
- {
350
- "epoch": 4.0,
351
- "eval_accuracy": 0.9699248120300752,
352
- "eval_loss": 0.11242649704217911,
353
- "eval_runtime": 0.9749,
354
- "eval_samples_per_second": 136.426,
355
- "eval_steps_per_second": 17.438,
356
- "step": 520
357
- },
358
- {
359
- "epoch": 4.08,
360
- "learning_rate": 0.00020055723659649904,
361
- "loss": 0.0799,
362
- "step": 530
363
- },
364
- {
365
- "epoch": 4.15,
366
- "learning_rate": 0.00016946391675198836,
367
- "loss": 0.1854,
368
- "step": 540
369
- },
370
- {
371
- "epoch": 4.23,
372
- "learning_rate": 0.00014076524743778318,
373
- "loss": 0.2312,
374
- "step": 550
375
- },
376
- {
377
- "epoch": 4.31,
378
- "learning_rate": 0.0001145439743467902,
379
- "loss": 0.1739,
380
- "step": 560
381
- },
382
- {
383
- "epoch": 4.38,
384
- "learning_rate": 9.08757001972762e-05,
385
- "loss": 0.1797,
386
- "step": 570
387
- },
388
- {
389
- "epoch": 4.46,
390
- "learning_rate": 6.98286667506618e-05,
391
- "loss": 0.1376,
392
- "step": 580
393
- },
394
- {
395
- "epoch": 4.54,
396
- "learning_rate": 5.146355805285452e-05,
397
- "loss": 0.11,
398
- "step": 590
399
- },
400
- {
401
- "epoch": 4.62,
402
- "learning_rate": 3.5833325466437696e-05,
403
- "loss": 0.1737,
404
- "step": 600
405
- },
406
- {
407
- "epoch": 4.69,
408
- "learning_rate": 2.2983034998182995e-05,
409
- "loss": 0.1018,
410
- "step": 610
411
- },
412
- {
413
- "epoch": 4.77,
414
- "learning_rate": 1.2949737362087154e-05,
415
- "loss": 0.2031,
416
- "step": 620
417
- },
418
- {
419
- "epoch": 4.85,
420
- "learning_rate": 5.7623611525721155e-06,
421
- "loss": 0.1868,
422
- "step": 630
423
- },
424
- {
425
- "epoch": 4.92,
426
- "learning_rate": 1.4416294358582382e-06,
427
- "loss": 0.1781,
428
- "step": 640
429
- },
430
- {
431
- "epoch": 5.0,
432
- "learning_rate": 0.0,
433
- "loss": 0.1557,
434
- "step": 650
435
- },
436
- {
437
- "epoch": 5.0,
438
- "eval_accuracy": 0.9699248120300752,
439
- "eval_loss": 0.10140984505414963,
440
- "eval_runtime": 1.0018,
441
- "eval_samples_per_second": 132.757,
442
- "eval_steps_per_second": 16.969,
443
- "step": 650
444
  },
445
  {
446
- "epoch": 5.0,
447
- "step": 650,
448
- "total_flos": 1.0979660327860224e+17,
449
- "train_loss": 0.3958505061039558,
450
- "train_runtime": 122.678,
451
- "train_samples_per_second": 42.143,
452
- "train_steps_per_second": 5.298
453
  }
454
  ],
455
- "max_steps": 650,
456
- "num_train_epochs": 5,
457
- "total_flos": 1.0979660327860224e+17,
458
  "trial_name": null,
459
  "trial_params": null
460
  }
 
1
  {
2
+ "best_metric": 0.21882569789886475,
3
+ "best_model_checkpoint": "./beans_outputs/checkpoint-130",
4
+ "epoch": 1.0,
5
+ "global_step": 130,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.08,
12
+ "learning_rate": 0.0015384615384615387,
13
+ "loss": 1.0909,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.15,
18
+ "learning_rate": 0.0019823877374156645,
19
+ "loss": 0.9564,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.23,
24
+ "learning_rate": 0.0018976137276390142,
25
+ "loss": 0.8485,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.31,
30
+ "learning_rate": 0.001748510748171101,
31
+ "loss": 0.682,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.38,
36
+ "learning_rate": 0.0015457645101945045,
37
+ "loss": 0.7391,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.46,
42
+ "learning_rate": 0.0013039051575742468,
43
+ "loss": 0.3587,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.54,
48
+ "learning_rate": 0.0010402659401094153,
49
+ "loss": 0.5773,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.62,
54
+ "learning_rate": 0.0007737409975702781,
55
+ "loss": 0.507,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.69,
60
+ "learning_rate": 0.0005234312799786921,
61
+ "loss": 0.4312,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.77,
66
+ "learning_rate": 0.00030727564649040063,
67
+ "loss": 0.4067,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.85,
72
+ "learning_rate": 0.00014076524743778318,
73
+ "loss": 0.3141,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.92,
78
+ "learning_rate": 3.5833325466437696e-05,
79
+ "loss": 0.3359,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 1.0,
84
+ "learning_rate": 0.0,
85
+ "loss": 0.5679,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 1.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  "eval_accuracy": 0.9022556390977443,
91
+ "eval_loss": 0.21882569789886475,
92
+ "eval_runtime": 0.8027,
93
+ "eval_samples_per_second": 165.693,
94
+ "eval_steps_per_second": 21.179,
95
+ "step": 130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  },
97
  {
98
+ "epoch": 1.0,
99
+ "step": 130,
100
+ "total_flos": 2.195932065572045e+16,
101
+ "train_loss": 0.6012275494061984,
102
+ "train_runtime": 20.9633,
103
+ "train_samples_per_second": 49.324,
104
+ "train_steps_per_second": 6.201
105
  }
106
  ],
107
+ "max_steps": 130,
108
+ "num_train_epochs": 1,
109
+ "total_flos": 2.195932065572045e+16,
110
  "trial_name": null,
111
  "trial_params": null
112
  }