Dhika commited on
Commit
b895107
1 Parent(s): 0612a3e
all_results.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 30.0,
3
+ "eval_accuracy": 0.7542857142857143,
4
+ "eval_loss": 0.7207300066947937,
5
+ "eval_runtime": 226.5026,
6
+ "eval_samples_per_second": 1.545,
7
+ "eval_steps_per_second": 0.194,
8
+ "test_accuracy": 0.7182320441988951,
9
+ "test_loss": 0.7332790493965149,
10
+ "test_runtime": 164.6516,
11
+ "test_samples_per_second": 1.099,
12
+ "test_steps_per_second": 0.14,
13
+ "total_flos": 9.8805828893184e+17,
14
+ "train_loss": 0.09906705205639203,
15
+ "train_runtime": 36217.9644,
16
+ "train_samples_per_second": 0.352,
17
+ "train_steps_per_second": 0.012
18
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 30.0,
3
+ "eval_accuracy": 0.7542857142857143,
4
+ "eval_loss": 0.7207300066947937,
5
+ "eval_runtime": 226.5026,
6
+ "eval_samples_per_second": 1.545,
7
+ "eval_steps_per_second": 0.194
8
+ }
runs/Jun14_00-20-54_e674c9136a07/events.out.tfevents.1686738783.e674c9136a07.10000.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bff1a512d8d07675c0a067964f6abb306b53bad026626d5d25908cbbf53cf1a
3
+ size 411
test_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "test_accuracy": 0.7182320441988951,
3
+ "test_loss": 0.7332790493965149,
4
+ "test_runtime": 164.6516,
5
+ "test_samples_per_second": 1.099,
6
+ "test_steps_per_second": 0.14
7
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 30.0,
3
+ "total_flos": 9.8805828893184e+17,
4
+ "train_loss": 0.09906705205639203,
5
+ "train_runtime": 36217.9644,
6
+ "train_samples_per_second": 0.352,
7
+ "train_steps_per_second": 0.012
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,700 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7207300066947937,
3
+ "best_model_checkpoint": "/raildefectfft2/checkpoint-30",
4
+ "epoch": 30.0,
5
+ "global_step": 450,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.67,
12
+ "learning_rate": 0.00019555555555555556,
13
+ "loss": 1.3922,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.67,
18
+ "eval_accuracy": 0.6114285714285714,
19
+ "eval_loss": 1.1690133810043335,
20
+ "eval_runtime": 217.3963,
21
+ "eval_samples_per_second": 1.61,
22
+ "eval_steps_per_second": 0.202,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 1.33,
27
+ "learning_rate": 0.00019111111111111114,
28
+ "loss": 0.8518,
29
+ "step": 20
30
+ },
31
+ {
32
+ "epoch": 1.33,
33
+ "eval_accuracy": 0.6828571428571428,
34
+ "eval_loss": 0.8874489068984985,
35
+ "eval_runtime": 217.0024,
36
+ "eval_samples_per_second": 1.613,
37
+ "eval_steps_per_second": 0.203,
38
+ "step": 20
39
+ },
40
+ {
41
+ "epoch": 2.0,
42
+ "learning_rate": 0.0001866666666666667,
43
+ "loss": 0.5386,
44
+ "step": 30
45
+ },
46
+ {
47
+ "epoch": 2.0,
48
+ "eval_accuracy": 0.7542857142857143,
49
+ "eval_loss": 0.7207300066947937,
50
+ "eval_runtime": 221.2829,
51
+ "eval_samples_per_second": 1.582,
52
+ "eval_steps_per_second": 0.199,
53
+ "step": 30
54
+ },
55
+ {
56
+ "epoch": 2.67,
57
+ "learning_rate": 0.00018222222222222224,
58
+ "loss": 0.3125,
59
+ "step": 40
60
+ },
61
+ {
62
+ "epoch": 2.67,
63
+ "eval_accuracy": 0.7285714285714285,
64
+ "eval_loss": 0.8382583260536194,
65
+ "eval_runtime": 217.2612,
66
+ "eval_samples_per_second": 1.611,
67
+ "eval_steps_per_second": 0.203,
68
+ "step": 40
69
+ },
70
+ {
71
+ "epoch": 3.33,
72
+ "learning_rate": 0.00017777777777777779,
73
+ "loss": 0.2264,
74
+ "step": 50
75
+ },
76
+ {
77
+ "epoch": 3.33,
78
+ "eval_accuracy": 0.7428571428571429,
79
+ "eval_loss": 0.8440293669700623,
80
+ "eval_runtime": 215.3358,
81
+ "eval_samples_per_second": 1.625,
82
+ "eval_steps_per_second": 0.204,
83
+ "step": 50
84
+ },
85
+ {
86
+ "epoch": 4.0,
87
+ "learning_rate": 0.00017333333333333334,
88
+ "loss": 0.1613,
89
+ "step": 60
90
+ },
91
+ {
92
+ "epoch": 4.0,
93
+ "eval_accuracy": 0.7457142857142857,
94
+ "eval_loss": 0.8516280055046082,
95
+ "eval_runtime": 221.3905,
96
+ "eval_samples_per_second": 1.581,
97
+ "eval_steps_per_second": 0.199,
98
+ "step": 60
99
+ },
100
+ {
101
+ "epoch": 4.67,
102
+ "learning_rate": 0.00016888888888888889,
103
+ "loss": 0.119,
104
+ "step": 70
105
+ },
106
+ {
107
+ "epoch": 4.67,
108
+ "eval_accuracy": 0.6,
109
+ "eval_loss": 1.3625210523605347,
110
+ "eval_runtime": 210.1415,
111
+ "eval_samples_per_second": 1.666,
112
+ "eval_steps_per_second": 0.209,
113
+ "step": 70
114
+ },
115
+ {
116
+ "epoch": 5.33,
117
+ "learning_rate": 0.00016444444444444444,
118
+ "loss": 0.0972,
119
+ "step": 80
120
+ },
121
+ {
122
+ "epoch": 5.33,
123
+ "eval_accuracy": 0.7428571428571429,
124
+ "eval_loss": 0.9109606146812439,
125
+ "eval_runtime": 208.8901,
126
+ "eval_samples_per_second": 1.676,
127
+ "eval_steps_per_second": 0.211,
128
+ "step": 80
129
+ },
130
+ {
131
+ "epoch": 6.0,
132
+ "learning_rate": 0.00016,
133
+ "loss": 0.0844,
134
+ "step": 90
135
+ },
136
+ {
137
+ "epoch": 6.0,
138
+ "eval_accuracy": 0.78,
139
+ "eval_loss": 0.8271930813789368,
140
+ "eval_runtime": 214.6994,
141
+ "eval_samples_per_second": 1.63,
142
+ "eval_steps_per_second": 0.205,
143
+ "step": 90
144
+ },
145
+ {
146
+ "epoch": 6.67,
147
+ "learning_rate": 0.00015555555555555556,
148
+ "loss": 0.0725,
149
+ "step": 100
150
+ },
151
+ {
152
+ "epoch": 6.67,
153
+ "eval_accuracy": 0.74,
154
+ "eval_loss": 0.8958377242088318,
155
+ "eval_runtime": 209.0619,
156
+ "eval_samples_per_second": 1.674,
157
+ "eval_steps_per_second": 0.21,
158
+ "step": 100
159
+ },
160
+ {
161
+ "epoch": 7.33,
162
+ "learning_rate": 0.0001511111111111111,
163
+ "loss": 0.0708,
164
+ "step": 110
165
+ },
166
+ {
167
+ "epoch": 7.33,
168
+ "eval_accuracy": 0.7371428571428571,
169
+ "eval_loss": 1.0972360372543335,
170
+ "eval_runtime": 210.819,
171
+ "eval_samples_per_second": 1.66,
172
+ "eval_steps_per_second": 0.209,
173
+ "step": 110
174
+ },
175
+ {
176
+ "epoch": 8.0,
177
+ "learning_rate": 0.00014666666666666666,
178
+ "loss": 0.041,
179
+ "step": 120
180
+ },
181
+ {
182
+ "epoch": 8.0,
183
+ "eval_accuracy": 0.7628571428571429,
184
+ "eval_loss": 1.0088900327682495,
185
+ "eval_runtime": 209.2911,
186
+ "eval_samples_per_second": 1.672,
187
+ "eval_steps_per_second": 0.21,
188
+ "step": 120
189
+ },
190
+ {
191
+ "epoch": 8.67,
192
+ "learning_rate": 0.00014222222222222224,
193
+ "loss": 0.0312,
194
+ "step": 130
195
+ },
196
+ {
197
+ "epoch": 8.67,
198
+ "eval_accuracy": 0.7628571428571429,
199
+ "eval_loss": 1.03481125831604,
200
+ "eval_runtime": 215.9471,
201
+ "eval_samples_per_second": 1.621,
202
+ "eval_steps_per_second": 0.204,
203
+ "step": 130
204
+ },
205
+ {
206
+ "epoch": 9.33,
207
+ "learning_rate": 0.0001377777777777778,
208
+ "loss": 0.0401,
209
+ "step": 140
210
+ },
211
+ {
212
+ "epoch": 9.33,
213
+ "eval_accuracy": 0.7257142857142858,
214
+ "eval_loss": 1.2426719665527344,
215
+ "eval_runtime": 211.184,
216
+ "eval_samples_per_second": 1.657,
217
+ "eval_steps_per_second": 0.208,
218
+ "step": 140
219
+ },
220
+ {
221
+ "epoch": 10.0,
222
+ "learning_rate": 0.00013333333333333334,
223
+ "loss": 0.0271,
224
+ "step": 150
225
+ },
226
+ {
227
+ "epoch": 10.0,
228
+ "eval_accuracy": 0.7542857142857143,
229
+ "eval_loss": 1.0153539180755615,
230
+ "eval_runtime": 208.7196,
231
+ "eval_samples_per_second": 1.677,
232
+ "eval_steps_per_second": 0.211,
233
+ "step": 150
234
+ },
235
+ {
236
+ "epoch": 10.67,
237
+ "learning_rate": 0.00012888888888888892,
238
+ "loss": 0.0328,
239
+ "step": 160
240
+ },
241
+ {
242
+ "epoch": 10.67,
243
+ "eval_accuracy": 0.7714285714285715,
244
+ "eval_loss": 1.0373210906982422,
245
+ "eval_runtime": 216.803,
246
+ "eval_samples_per_second": 1.614,
247
+ "eval_steps_per_second": 0.203,
248
+ "step": 160
249
+ },
250
+ {
251
+ "epoch": 11.33,
252
+ "learning_rate": 0.00012444444444444444,
253
+ "loss": 0.023,
254
+ "step": 170
255
+ },
256
+ {
257
+ "epoch": 11.33,
258
+ "eval_accuracy": 0.7685714285714286,
259
+ "eval_loss": 1.005110502243042,
260
+ "eval_runtime": 209.183,
261
+ "eval_samples_per_second": 1.673,
262
+ "eval_steps_per_second": 0.21,
263
+ "step": 170
264
+ },
265
+ {
266
+ "epoch": 12.0,
267
+ "learning_rate": 0.00012,
268
+ "loss": 0.0199,
269
+ "step": 180
270
+ },
271
+ {
272
+ "epoch": 12.0,
273
+ "eval_accuracy": 0.7657142857142857,
274
+ "eval_loss": 0.9775477647781372,
275
+ "eval_runtime": 208.6639,
276
+ "eval_samples_per_second": 1.677,
277
+ "eval_steps_per_second": 0.211,
278
+ "step": 180
279
+ },
280
+ {
281
+ "epoch": 12.67,
282
+ "learning_rate": 0.00011555555555555555,
283
+ "loss": 0.0189,
284
+ "step": 190
285
+ },
286
+ {
287
+ "epoch": 12.67,
288
+ "eval_accuracy": 0.7657142857142857,
289
+ "eval_loss": 1.008815050125122,
290
+ "eval_runtime": 216.3653,
291
+ "eval_samples_per_second": 1.618,
292
+ "eval_steps_per_second": 0.203,
293
+ "step": 190
294
+ },
295
+ {
296
+ "epoch": 13.33,
297
+ "learning_rate": 0.00011111111111111112,
298
+ "loss": 0.0188,
299
+ "step": 200
300
+ },
301
+ {
302
+ "epoch": 13.33,
303
+ "eval_accuracy": 0.7342857142857143,
304
+ "eval_loss": 1.1904319524765015,
305
+ "eval_runtime": 209.8186,
306
+ "eval_samples_per_second": 1.668,
307
+ "eval_steps_per_second": 0.21,
308
+ "step": 200
309
+ },
310
+ {
311
+ "epoch": 14.0,
312
+ "learning_rate": 0.00010666666666666667,
313
+ "loss": 0.0167,
314
+ "step": 210
315
+ },
316
+ {
317
+ "epoch": 14.0,
318
+ "eval_accuracy": 0.7285714285714285,
319
+ "eval_loss": 1.2999135255813599,
320
+ "eval_runtime": 209.7587,
321
+ "eval_samples_per_second": 1.669,
322
+ "eval_steps_per_second": 0.21,
323
+ "step": 210
324
+ },
325
+ {
326
+ "epoch": 14.67,
327
+ "learning_rate": 0.00010222222222222222,
328
+ "loss": 0.0159,
329
+ "step": 220
330
+ },
331
+ {
332
+ "epoch": 14.67,
333
+ "eval_accuracy": 0.7514285714285714,
334
+ "eval_loss": 1.1326370239257812,
335
+ "eval_runtime": 215.8574,
336
+ "eval_samples_per_second": 1.621,
337
+ "eval_steps_per_second": 0.204,
338
+ "step": 220
339
+ },
340
+ {
341
+ "epoch": 15.33,
342
+ "learning_rate": 9.777777777777778e-05,
343
+ "loss": 0.0145,
344
+ "step": 230
345
+ },
346
+ {
347
+ "epoch": 15.33,
348
+ "eval_accuracy": 0.7542857142857143,
349
+ "eval_loss": 1.1385791301727295,
350
+ "eval_runtime": 209.3052,
351
+ "eval_samples_per_second": 1.672,
352
+ "eval_steps_per_second": 0.21,
353
+ "step": 230
354
+ },
355
+ {
356
+ "epoch": 16.0,
357
+ "learning_rate": 9.333333333333334e-05,
358
+ "loss": 0.015,
359
+ "step": 240
360
+ },
361
+ {
362
+ "epoch": 16.0,
363
+ "eval_accuracy": 0.7542857142857143,
364
+ "eval_loss": 1.1441489458084106,
365
+ "eval_runtime": 209.1403,
366
+ "eval_samples_per_second": 1.674,
367
+ "eval_steps_per_second": 0.21,
368
+ "step": 240
369
+ },
370
+ {
371
+ "epoch": 16.67,
372
+ "learning_rate": 8.888888888888889e-05,
373
+ "loss": 0.0133,
374
+ "step": 250
375
+ },
376
+ {
377
+ "epoch": 16.67,
378
+ "eval_accuracy": 0.7514285714285714,
379
+ "eval_loss": 1.154445767402649,
380
+ "eval_runtime": 212.9184,
381
+ "eval_samples_per_second": 1.644,
382
+ "eval_steps_per_second": 0.207,
383
+ "step": 250
384
+ },
385
+ {
386
+ "epoch": 17.33,
387
+ "learning_rate": 8.444444444444444e-05,
388
+ "loss": 0.0132,
389
+ "step": 260
390
+ },
391
+ {
392
+ "epoch": 17.33,
393
+ "eval_accuracy": 0.7514285714285714,
394
+ "eval_loss": 1.1629431247711182,
395
+ "eval_runtime": 209.529,
396
+ "eval_samples_per_second": 1.67,
397
+ "eval_steps_per_second": 0.21,
398
+ "step": 260
399
+ },
400
+ {
401
+ "epoch": 18.0,
402
+ "learning_rate": 8e-05,
403
+ "loss": 0.0121,
404
+ "step": 270
405
+ },
406
+ {
407
+ "epoch": 18.0,
408
+ "eval_accuracy": 0.7514285714285714,
409
+ "eval_loss": 1.1707779169082642,
410
+ "eval_runtime": 209.7822,
411
+ "eval_samples_per_second": 1.668,
412
+ "eval_steps_per_second": 0.21,
413
+ "step": 270
414
+ },
415
+ {
416
+ "epoch": 18.67,
417
+ "learning_rate": 7.555555555555556e-05,
418
+ "loss": 0.0121,
419
+ "step": 280
420
+ },
421
+ {
422
+ "epoch": 18.67,
423
+ "eval_accuracy": 0.7514285714285714,
424
+ "eval_loss": 1.1773394346237183,
425
+ "eval_runtime": 220.9849,
426
+ "eval_samples_per_second": 1.584,
427
+ "eval_steps_per_second": 0.199,
428
+ "step": 280
429
+ },
430
+ {
431
+ "epoch": 19.33,
432
+ "learning_rate": 7.111111111111112e-05,
433
+ "loss": 0.0114,
434
+ "step": 290
435
+ },
436
+ {
437
+ "epoch": 19.33,
438
+ "eval_accuracy": 0.7514285714285714,
439
+ "eval_loss": 1.1831494569778442,
440
+ "eval_runtime": 216.1426,
441
+ "eval_samples_per_second": 1.619,
442
+ "eval_steps_per_second": 0.204,
443
+ "step": 290
444
+ },
445
+ {
446
+ "epoch": 20.0,
447
+ "learning_rate": 6.666666666666667e-05,
448
+ "loss": 0.0111,
449
+ "step": 300
450
+ },
451
+ {
452
+ "epoch": 20.0,
453
+ "eval_accuracy": 0.7514285714285714,
454
+ "eval_loss": 1.1883198022842407,
455
+ "eval_runtime": 210.1269,
456
+ "eval_samples_per_second": 1.666,
457
+ "eval_steps_per_second": 0.209,
458
+ "step": 300
459
+ },
460
+ {
461
+ "epoch": 20.67,
462
+ "learning_rate": 6.222222222222222e-05,
463
+ "loss": 0.011,
464
+ "step": 310
465
+ },
466
+ {
467
+ "epoch": 20.67,
468
+ "eval_accuracy": 0.7514285714285714,
469
+ "eval_loss": 1.1936721801757812,
470
+ "eval_runtime": 210.5377,
471
+ "eval_samples_per_second": 1.662,
472
+ "eval_steps_per_second": 0.209,
473
+ "step": 310
474
+ },
475
+ {
476
+ "epoch": 21.33,
477
+ "learning_rate": 5.7777777777777776e-05,
478
+ "loss": 0.0103,
479
+ "step": 320
480
+ },
481
+ {
482
+ "epoch": 21.33,
483
+ "eval_accuracy": 0.7514285714285714,
484
+ "eval_loss": 1.1992676258087158,
485
+ "eval_runtime": 214.4415,
486
+ "eval_samples_per_second": 1.632,
487
+ "eval_steps_per_second": 0.205,
488
+ "step": 320
489
+ },
490
+ {
491
+ "epoch": 22.0,
492
+ "learning_rate": 5.333333333333333e-05,
493
+ "loss": 0.0103,
494
+ "step": 330
495
+ },
496
+ {
497
+ "epoch": 22.0,
498
+ "eval_accuracy": 0.7514285714285714,
499
+ "eval_loss": 1.2045563459396362,
500
+ "eval_runtime": 211.851,
501
+ "eval_samples_per_second": 1.652,
502
+ "eval_steps_per_second": 0.208,
503
+ "step": 330
504
+ },
505
+ {
506
+ "epoch": 22.67,
507
+ "learning_rate": 4.888888888888889e-05,
508
+ "loss": 0.0103,
509
+ "step": 340
510
+ },
511
+ {
512
+ "epoch": 22.67,
513
+ "eval_accuracy": 0.7514285714285714,
514
+ "eval_loss": 1.208925724029541,
515
+ "eval_runtime": 209.6112,
516
+ "eval_samples_per_second": 1.67,
517
+ "eval_steps_per_second": 0.21,
518
+ "step": 340
519
+ },
520
+ {
521
+ "epoch": 23.33,
522
+ "learning_rate": 4.4444444444444447e-05,
523
+ "loss": 0.0096,
524
+ "step": 350
525
+ },
526
+ {
527
+ "epoch": 23.33,
528
+ "eval_accuracy": 0.7514285714285714,
529
+ "eval_loss": 1.2132576704025269,
530
+ "eval_runtime": 219.2216,
531
+ "eval_samples_per_second": 1.597,
532
+ "eval_steps_per_second": 0.201,
533
+ "step": 350
534
+ },
535
+ {
536
+ "epoch": 24.0,
537
+ "learning_rate": 4e-05,
538
+ "loss": 0.0095,
539
+ "step": 360
540
+ },
541
+ {
542
+ "epoch": 24.0,
543
+ "eval_accuracy": 0.7514285714285714,
544
+ "eval_loss": 1.2171136140823364,
545
+ "eval_runtime": 208.9362,
546
+ "eval_samples_per_second": 1.675,
547
+ "eval_steps_per_second": 0.211,
548
+ "step": 360
549
+ },
550
+ {
551
+ "epoch": 24.67,
552
+ "learning_rate": 3.555555555555556e-05,
553
+ "loss": 0.0096,
554
+ "step": 370
555
+ },
556
+ {
557
+ "epoch": 24.67,
558
+ "eval_accuracy": 0.7514285714285714,
559
+ "eval_loss": 1.2204023599624634,
560
+ "eval_runtime": 209.9857,
561
+ "eval_samples_per_second": 1.667,
562
+ "eval_steps_per_second": 0.21,
563
+ "step": 370
564
+ },
565
+ {
566
+ "epoch": 25.33,
567
+ "learning_rate": 3.111111111111111e-05,
568
+ "loss": 0.0093,
569
+ "step": 380
570
+ },
571
+ {
572
+ "epoch": 25.33,
573
+ "eval_accuracy": 0.7485714285714286,
574
+ "eval_loss": 1.2234961986541748,
575
+ "eval_runtime": 216.2311,
576
+ "eval_samples_per_second": 1.619,
577
+ "eval_steps_per_second": 0.203,
578
+ "step": 380
579
+ },
580
+ {
581
+ "epoch": 26.0,
582
+ "learning_rate": 2.6666666666666667e-05,
583
+ "loss": 0.0091,
584
+ "step": 390
585
+ },
586
+ {
587
+ "epoch": 26.0,
588
+ "eval_accuracy": 0.7485714285714286,
589
+ "eval_loss": 1.2261521816253662,
590
+ "eval_runtime": 210.1553,
591
+ "eval_samples_per_second": 1.665,
592
+ "eval_steps_per_second": 0.209,
593
+ "step": 390
594
+ },
595
+ {
596
+ "epoch": 26.67,
597
+ "learning_rate": 2.2222222222222223e-05,
598
+ "loss": 0.0092,
599
+ "step": 400
600
+ },
601
+ {
602
+ "epoch": 26.67,
603
+ "eval_accuracy": 0.7514285714285714,
604
+ "eval_loss": 1.227960228919983,
605
+ "eval_runtime": 211.3289,
606
+ "eval_samples_per_second": 1.656,
607
+ "eval_steps_per_second": 0.208,
608
+ "step": 400
609
+ },
610
+ {
611
+ "epoch": 27.33,
612
+ "learning_rate": 1.777777777777778e-05,
613
+ "loss": 0.0089,
614
+ "step": 410
615
+ },
616
+ {
617
+ "epoch": 27.33,
618
+ "eval_accuracy": 0.7514285714285714,
619
+ "eval_loss": 1.2296239137649536,
620
+ "eval_runtime": 213.6977,
621
+ "eval_samples_per_second": 1.638,
622
+ "eval_steps_per_second": 0.206,
623
+ "step": 410
624
+ },
625
+ {
626
+ "epoch": 28.0,
627
+ "learning_rate": 1.3333333333333333e-05,
628
+ "loss": 0.0092,
629
+ "step": 420
630
+ },
631
+ {
632
+ "epoch": 28.0,
633
+ "eval_accuracy": 0.7514285714285714,
634
+ "eval_loss": 1.2309640645980835,
635
+ "eval_runtime": 210.9587,
636
+ "eval_samples_per_second": 1.659,
637
+ "eval_steps_per_second": 0.209,
638
+ "step": 420
639
+ },
640
+ {
641
+ "epoch": 28.67,
642
+ "learning_rate": 8.88888888888889e-06,
643
+ "loss": 0.0089,
644
+ "step": 430
645
+ },
646
+ {
647
+ "epoch": 28.67,
648
+ "eval_accuracy": 0.7485714285714286,
649
+ "eval_loss": 1.2319449186325073,
650
+ "eval_runtime": 210.158,
651
+ "eval_samples_per_second": 1.665,
652
+ "eval_steps_per_second": 0.209,
653
+ "step": 430
654
+ },
655
+ {
656
+ "epoch": 29.33,
657
+ "learning_rate": 4.444444444444445e-06,
658
+ "loss": 0.0089,
659
+ "step": 440
660
+ },
661
+ {
662
+ "epoch": 29.33,
663
+ "eval_accuracy": 0.7485714285714286,
664
+ "eval_loss": 1.23252272605896,
665
+ "eval_runtime": 216.1534,
666
+ "eval_samples_per_second": 1.619,
667
+ "eval_steps_per_second": 0.204,
668
+ "step": 440
669
+ },
670
+ {
671
+ "epoch": 30.0,
672
+ "learning_rate": 0.0,
673
+ "loss": 0.0088,
674
+ "step": 450
675
+ },
676
+ {
677
+ "epoch": 30.0,
678
+ "eval_accuracy": 0.7485714285714286,
679
+ "eval_loss": 1.2327271699905396,
680
+ "eval_runtime": 216.0649,
681
+ "eval_samples_per_second": 1.62,
682
+ "eval_steps_per_second": 0.204,
683
+ "step": 450
684
+ },
685
+ {
686
+ "epoch": 30.0,
687
+ "step": 450,
688
+ "total_flos": 9.8805828893184e+17,
689
+ "train_loss": 0.09906705205639203,
690
+ "train_runtime": 36217.9644,
691
+ "train_samples_per_second": 0.352,
692
+ "train_steps_per_second": 0.012
693
+ }
694
+ ],
695
+ "max_steps": 450,
696
+ "num_train_epochs": 30,
697
+ "total_flos": 9.8805828893184e+17,
698
+ "trial_name": null,
699
+ "trial_params": null
700
+ }