rshrott commited on
Commit
455f9b9
1 Parent(s): 2fe4aa0

🍻 cheers

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 8.0,
3
- "eval_accuracy": 0.6831683168316832,
4
- "eval_loss": 0.8943801522254944,
5
- "eval_runtime": 4.6419,
6
- "eval_samples_per_second": 21.758,
7
- "eval_steps_per_second": 2.801,
8
- "total_flos": 5.6352679914450125e+17,
9
- "train_loss": 0.3986694132745789,
10
- "train_runtime": 506.3898,
11
- "train_samples_per_second": 14.36,
12
- "train_steps_per_second": 0.9
13
  }
 
1
  {
2
+ "epoch": 4.0,
3
+ "eval_accuracy": 0.6772727272727272,
4
+ "eval_loss": 0.7634429931640625,
5
+ "eval_runtime": 7.4362,
6
+ "eval_samples_per_second": 29.585,
7
+ "eval_steps_per_second": 3.765,
8
+ "total_flos": 6.137420584742093e+17,
9
+ "train_loss": 0.4632607804190728,
10
+ "train_runtime": 749.353,
11
+ "train_samples_per_second": 10.569,
12
+ "train_steps_per_second": 0.662
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 8.0,
3
- "eval_accuracy": 0.6831683168316832,
4
- "eval_loss": 0.8943801522254944,
5
- "eval_runtime": 4.6419,
6
- "eval_samples_per_second": 21.758,
7
- "eval_steps_per_second": 2.801
8
  }
 
1
  {
2
+ "epoch": 4.0,
3
+ "eval_accuracy": 0.6772727272727272,
4
+ "eval_loss": 0.7634429931640625,
5
+ "eval_runtime": 7.4362,
6
+ "eval_samples_per_second": 29.585,
7
+ "eval_steps_per_second": 3.765
8
  }
runs/Aug01_01-04-29_eb351023ca0f/events.out.tfevents.1690852784.eb351023ca0f.2251.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06d631550141fb84d75a37862311124ab83d7c4f4675c1cf95625ac14e0ad7c3
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 8.0,
3
- "total_flos": 5.6352679914450125e+17,
4
- "train_loss": 0.3986694132745789,
5
- "train_runtime": 506.3898,
6
- "train_samples_per_second": 14.36,
7
- "train_steps_per_second": 0.9
8
  }
 
1
  {
2
+ "epoch": 4.0,
3
+ "total_flos": 6.137420584742093e+17,
4
+ "train_loss": 0.4632607804190728,
5
+ "train_runtime": 749.353,
6
+ "train_samples_per_second": 10.569,
7
+ "train_steps_per_second": 0.662
8
  }
trainer_state.json CHANGED
@@ -1,331 +1,490 @@
1
  {
2
- "best_metric": 0.8943801522254944,
3
- "best_model_checkpoint": "./vit-base-renovation/checkpoint-200",
4
- "epoch": 8.0,
5
- "global_step": 456,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.18,
12
- "learning_rate": 0.0001956140350877193,
13
- "loss": 0.9993,
14
  "step": 10
15
  },
16
  {
17
- "epoch": 0.35,
18
- "learning_rate": 0.0001912280701754386,
19
- "loss": 0.9628,
20
  "step": 20
21
  },
22
  {
23
- "epoch": 0.53,
24
- "learning_rate": 0.00018684210526315792,
25
- "loss": 1.058,
 
 
 
 
 
 
 
 
 
26
  "step": 30
27
  },
28
  {
29
- "epoch": 0.7,
30
- "learning_rate": 0.0001824561403508772,
31
- "loss": 0.9951,
32
  "step": 40
33
  },
34
  {
35
- "epoch": 0.88,
36
- "learning_rate": 0.0001780701754385965,
37
- "loss": 1.0107,
38
  "step": 50
39
  },
40
  {
41
- "epoch": 1.05,
42
- "learning_rate": 0.0001736842105263158,
43
- "loss": 1.0481,
 
 
 
 
 
 
 
 
 
44
  "step": 60
45
  },
46
  {
47
- "epoch": 1.23,
48
- "learning_rate": 0.0001692982456140351,
49
- "loss": 0.9934,
50
  "step": 70
51
  },
52
  {
53
- "epoch": 1.4,
54
- "learning_rate": 0.0001649122807017544,
55
- "loss": 0.9848,
 
 
 
 
 
 
 
 
 
56
  "step": 80
57
  },
58
  {
59
- "epoch": 1.58,
60
- "learning_rate": 0.0001605263157894737,
61
- "loss": 0.8844,
62
  "step": 90
63
  },
64
  {
65
- "epoch": 1.75,
66
- "learning_rate": 0.00015614035087719297,
67
- "loss": 0.8483,
68
  "step": 100
69
  },
70
  {
71
- "epoch": 1.75,
72
- "eval_accuracy": 0.5445544554455446,
73
- "eval_loss": 0.9965408444404602,
74
- "eval_runtime": 4.6358,
75
- "eval_samples_per_second": 21.787,
76
- "eval_steps_per_second": 2.804,
77
  "step": 100
78
  },
79
  {
80
- "epoch": 1.93,
81
- "learning_rate": 0.0001517543859649123,
82
- "loss": 0.9321,
83
  "step": 110
84
  },
85
  {
86
- "epoch": 2.11,
87
- "learning_rate": 0.00014736842105263158,
88
- "loss": 0.7607,
89
  "step": 120
90
  },
91
  {
92
- "epoch": 2.28,
93
- "learning_rate": 0.00014298245614035088,
94
- "loss": 0.6632,
 
 
 
 
 
 
 
 
 
95
  "step": 130
96
  },
97
  {
98
- "epoch": 2.46,
99
- "learning_rate": 0.00013859649122807018,
100
- "loss": 0.7359,
101
  "step": 140
102
  },
103
  {
104
- "epoch": 2.63,
105
- "learning_rate": 0.00013421052631578948,
106
- "loss": 0.7393,
107
  "step": 150
108
  },
109
  {
110
- "epoch": 2.81,
111
- "learning_rate": 0.0001298245614035088,
112
- "loss": 0.6817,
 
 
 
 
 
 
 
 
 
113
  "step": 160
114
  },
115
  {
116
- "epoch": 2.98,
117
- "learning_rate": 0.00012543859649122806,
118
- "loss": 0.6913,
119
  "step": 170
120
  },
121
  {
122
- "epoch": 3.16,
123
- "learning_rate": 0.00012149122807017544,
124
- "loss": 0.5017,
 
 
 
 
 
 
 
 
 
125
  "step": 180
126
  },
127
  {
128
- "epoch": 3.33,
129
- "learning_rate": 0.00011710526315789474,
130
- "loss": 0.3448,
131
  "step": 190
132
  },
133
  {
134
- "epoch": 3.51,
135
- "learning_rate": 0.00011271929824561404,
136
- "loss": 0.3474,
137
  "step": 200
138
  },
139
  {
140
- "epoch": 3.51,
141
- "eval_accuracy": 0.6831683168316832,
142
- "eval_loss": 0.8943801522254944,
143
- "eval_runtime": 4.5642,
144
- "eval_samples_per_second": 22.129,
145
- "eval_steps_per_second": 2.848,
146
  "step": 200
147
  },
148
  {
149
- "epoch": 3.68,
150
- "learning_rate": 0.00010833333333333333,
151
- "loss": 0.3598,
152
  "step": 210
153
  },
154
  {
155
- "epoch": 3.86,
156
- "learning_rate": 0.00010394736842105264,
157
- "loss": 0.3214,
158
  "step": 220
159
  },
160
  {
161
- "epoch": 4.04,
162
- "learning_rate": 9.956140350877193e-05,
163
- "loss": 0.2831,
 
 
 
 
 
 
 
 
 
164
  "step": 230
165
  },
166
  {
167
- "epoch": 4.21,
168
- "learning_rate": 9.517543859649123e-05,
169
- "loss": 0.1586,
170
  "step": 240
171
  },
172
  {
173
- "epoch": 4.39,
174
- "learning_rate": 9.078947368421054e-05,
175
- "loss": 0.0929,
176
  "step": 250
177
  },
178
  {
179
- "epoch": 4.56,
180
- "learning_rate": 8.640350877192982e-05,
181
- "loss": 0.1087,
 
 
 
 
 
 
 
 
 
182
  "step": 260
183
  },
184
  {
185
- "epoch": 4.74,
186
- "learning_rate": 8.201754385964913e-05,
187
- "loss": 0.1019,
188
  "step": 270
189
  },
190
  {
191
- "epoch": 4.91,
192
- "learning_rate": 7.763157894736843e-05,
193
- "loss": 0.0528,
 
 
 
 
 
 
 
 
 
194
  "step": 280
195
  },
196
  {
197
- "epoch": 5.09,
198
- "learning_rate": 7.324561403508772e-05,
199
- "loss": 0.1051,
200
  "step": 290
201
  },
202
  {
203
- "epoch": 5.26,
204
- "learning_rate": 6.885964912280702e-05,
205
- "loss": 0.0328,
206
  "step": 300
207
  },
208
  {
209
- "epoch": 5.26,
210
- "eval_accuracy": 0.6633663366336634,
211
- "eval_loss": 1.158254861831665,
212
- "eval_runtime": 4.4598,
213
- "eval_samples_per_second": 22.647,
214
- "eval_steps_per_second": 2.915,
215
  "step": 300
216
  },
217
  {
218
- "epoch": 5.44,
219
- "learning_rate": 6.447368421052632e-05,
220
- "loss": 0.0686,
221
  "step": 310
222
  },
223
  {
224
- "epoch": 5.61,
225
- "learning_rate": 6.0087719298245616e-05,
226
- "loss": 0.0358,
227
  "step": 320
228
  },
229
  {
230
- "epoch": 5.79,
231
- "learning_rate": 5.570175438596491e-05,
232
- "loss": 0.0255,
 
 
 
 
 
 
 
 
 
233
  "step": 330
234
  },
235
  {
236
- "epoch": 5.96,
237
- "learning_rate": 5.131578947368422e-05,
238
- "loss": 0.0401,
239
  "step": 340
240
  },
241
  {
242
- "epoch": 6.14,
243
- "learning_rate": 4.6929824561403515e-05,
244
- "loss": 0.0217,
 
 
 
 
 
 
 
 
 
245
  "step": 350
246
  },
247
  {
248
- "epoch": 6.32,
249
- "learning_rate": 4.254385964912281e-05,
250
- "loss": 0.0208,
251
  "step": 360
252
  },
253
  {
254
- "epoch": 6.49,
255
- "learning_rate": 3.815789473684211e-05,
256
- "loss": 0.0203,
257
  "step": 370
258
  },
259
  {
260
- "epoch": 6.67,
261
- "learning_rate": 3.377192982456141e-05,
262
- "loss": 0.0196,
 
 
 
 
 
 
 
 
 
263
  "step": 380
264
  },
265
  {
266
- "epoch": 6.84,
267
- "learning_rate": 2.9385964912280706e-05,
268
- "loss": 0.0176,
269
  "step": 390
270
  },
271
  {
272
- "epoch": 7.02,
273
- "learning_rate": 2.5e-05,
274
- "loss": 0.0176,
275
  "step": 400
276
  },
277
  {
278
- "epoch": 7.02,
279
- "eval_accuracy": 0.6831683168316832,
280
- "eval_loss": 1.0844857692718506,
281
- "eval_runtime": 4.6632,
282
- "eval_samples_per_second": 21.659,
283
- "eval_steps_per_second": 2.788,
284
  "step": 400
285
  },
286
  {
287
- "epoch": 7.19,
288
- "learning_rate": 2.06140350877193e-05,
289
- "loss": 0.0168,
290
  "step": 410
291
  },
292
  {
293
- "epoch": 7.37,
294
- "learning_rate": 1.62280701754386e-05,
295
- "loss": 0.0165,
296
  "step": 420
297
  },
298
  {
299
- "epoch": 7.54,
300
- "learning_rate": 1.1842105263157895e-05,
301
- "loss": 0.0161,
 
 
 
 
 
 
 
 
 
302
  "step": 430
303
  },
304
  {
305
- "epoch": 7.72,
306
- "learning_rate": 7.456140350877193e-06,
307
- "loss": 0.0163,
308
  "step": 440
309
  },
310
  {
311
- "epoch": 7.89,
312
- "learning_rate": 3.070175438596491e-06,
313
- "loss": 0.0162,
 
 
 
 
 
 
 
 
 
314
  "step": 450
315
  },
316
  {
317
- "epoch": 8.0,
318
- "step": 456,
319
- "total_flos": 5.6352679914450125e+17,
320
- "train_loss": 0.3986694132745789,
321
- "train_runtime": 506.3898,
322
- "train_samples_per_second": 14.36,
323
- "train_steps_per_second": 0.9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
  }
325
  ],
326
- "max_steps": 456,
327
- "num_train_epochs": 8,
328
- "total_flos": 5.6352679914450125e+17,
329
  "trial_name": null,
330
  "trial_params": null
331
  }
 
1
  {
2
+ "best_metric": 0.7634429931640625,
3
+ "best_model_checkpoint": "./vit-base-renovation/checkpoint-175",
4
+ "epoch": 4.0,
5
+ "global_step": 496,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.08,
12
+ "learning_rate": 0.00019596774193548388,
13
+ "loss": 0.9826,
14
  "step": 10
15
  },
16
  {
17
+ "epoch": 0.16,
18
+ "learning_rate": 0.00019193548387096775,
19
+ "loss": 0.9741,
20
  "step": 20
21
  },
22
  {
23
+ "epoch": 0.2,
24
+ "eval_accuracy": 0.4818181818181818,
25
+ "eval_loss": 0.9574973583221436,
26
+ "eval_runtime": 7.5527,
27
+ "eval_samples_per_second": 29.129,
28
+ "eval_steps_per_second": 3.707,
29
+ "step": 25
30
+ },
31
+ {
32
+ "epoch": 0.24,
33
+ "learning_rate": 0.00018790322580645164,
34
+ "loss": 1.0046,
35
  "step": 30
36
  },
37
  {
38
+ "epoch": 0.32,
39
+ "learning_rate": 0.00018387096774193548,
40
+ "loss": 0.9326,
41
  "step": 40
42
  },
43
  {
44
+ "epoch": 0.4,
45
+ "learning_rate": 0.00017983870967741935,
46
+ "loss": 0.9827,
47
  "step": 50
48
  },
49
  {
50
+ "epoch": 0.4,
51
+ "eval_accuracy": 0.5181818181818182,
52
+ "eval_loss": 0.9343744516372681,
53
+ "eval_runtime": 7.6495,
54
+ "eval_samples_per_second": 28.76,
55
+ "eval_steps_per_second": 3.66,
56
+ "step": 50
57
+ },
58
+ {
59
+ "epoch": 0.48,
60
+ "learning_rate": 0.00017580645161290325,
61
+ "loss": 0.91,
62
  "step": 60
63
  },
64
  {
65
+ "epoch": 0.56,
66
+ "learning_rate": 0.00017177419354838711,
67
+ "loss": 0.8578,
68
  "step": 70
69
  },
70
  {
71
+ "epoch": 0.6,
72
+ "eval_accuracy": 0.6181818181818182,
73
+ "eval_loss": 0.8342534899711609,
74
+ "eval_runtime": 7.5132,
75
+ "eval_samples_per_second": 29.282,
76
+ "eval_steps_per_second": 3.727,
77
+ "step": 75
78
+ },
79
+ {
80
+ "epoch": 0.65,
81
+ "learning_rate": 0.00016774193548387098,
82
+ "loss": 0.9354,
83
  "step": 80
84
  },
85
  {
86
+ "epoch": 0.73,
87
+ "learning_rate": 0.00016370967741935485,
88
+ "loss": 0.8993,
89
  "step": 90
90
  },
91
  {
92
+ "epoch": 0.81,
93
+ "learning_rate": 0.00015967741935483872,
94
+ "loss": 0.9373,
95
  "step": 100
96
  },
97
  {
98
+ "epoch": 0.81,
99
+ "eval_accuracy": 0.5909090909090909,
100
+ "eval_loss": 0.8896223306655884,
101
+ "eval_runtime": 8.4017,
102
+ "eval_samples_per_second": 26.185,
103
+ "eval_steps_per_second": 3.333,
104
  "step": 100
105
  },
106
  {
107
+ "epoch": 0.89,
108
+ "learning_rate": 0.0001556451612903226,
109
+ "loss": 0.8373,
110
  "step": 110
111
  },
112
  {
113
+ "epoch": 0.97,
114
+ "learning_rate": 0.00015161290322580646,
115
+ "loss": 0.7462,
116
  "step": 120
117
  },
118
  {
119
+ "epoch": 1.01,
120
+ "eval_accuracy": 0.6363636363636364,
121
+ "eval_loss": 0.7968847155570984,
122
+ "eval_runtime": 7.5692,
123
+ "eval_samples_per_second": 29.065,
124
+ "eval_steps_per_second": 3.699,
125
+ "step": 125
126
+ },
127
+ {
128
+ "epoch": 1.05,
129
+ "learning_rate": 0.00014758064516129032,
130
+ "loss": 0.8443,
131
  "step": 130
132
  },
133
  {
134
+ "epoch": 1.13,
135
+ "learning_rate": 0.00014354838709677422,
136
+ "loss": 0.5794,
137
  "step": 140
138
  },
139
  {
140
+ "epoch": 1.21,
141
+ "learning_rate": 0.0001395161290322581,
142
+ "loss": 0.6953,
143
  "step": 150
144
  },
145
  {
146
+ "epoch": 1.21,
147
+ "eval_accuracy": 0.6363636363636364,
148
+ "eval_loss": 0.8157215714454651,
149
+ "eval_runtime": 7.5224,
150
+ "eval_samples_per_second": 29.246,
151
+ "eval_steps_per_second": 3.722,
152
+ "step": 150
153
+ },
154
+ {
155
+ "epoch": 1.29,
156
+ "learning_rate": 0.00013548387096774193,
157
+ "loss": 0.6258,
158
  "step": 160
159
  },
160
  {
161
+ "epoch": 1.37,
162
+ "learning_rate": 0.0001314516129032258,
163
+ "loss": 0.5461,
164
  "step": 170
165
  },
166
  {
167
+ "epoch": 1.41,
168
+ "eval_accuracy": 0.6772727272727272,
169
+ "eval_loss": 0.7634429931640625,
170
+ "eval_runtime": 7.4134,
171
+ "eval_samples_per_second": 29.676,
172
+ "eval_steps_per_second": 3.777,
173
+ "step": 175
174
+ },
175
+ {
176
+ "epoch": 1.45,
177
+ "learning_rate": 0.0001274193548387097,
178
+ "loss": 0.6182,
179
  "step": 180
180
  },
181
  {
182
+ "epoch": 1.53,
183
+ "learning_rate": 0.00012338709677419356,
184
+ "loss": 0.6352,
185
  "step": 190
186
  },
187
  {
188
+ "epoch": 1.61,
189
+ "learning_rate": 0.00011935483870967743,
190
+ "loss": 0.6445,
191
  "step": 200
192
  },
193
  {
194
+ "epoch": 1.61,
195
+ "eval_accuracy": 0.6545454545454545,
196
+ "eval_loss": 0.7743316888809204,
197
+ "eval_runtime": 7.6503,
198
+ "eval_samples_per_second": 28.757,
199
+ "eval_steps_per_second": 3.66,
200
  "step": 200
201
  },
202
  {
203
+ "epoch": 1.69,
204
+ "learning_rate": 0.00011532258064516131,
205
+ "loss": 0.5757,
206
  "step": 210
207
  },
208
  {
209
+ "epoch": 1.77,
210
+ "learning_rate": 0.00011129032258064515,
211
+ "loss": 0.5437,
212
  "step": 220
213
  },
214
  {
215
+ "epoch": 1.81,
216
+ "eval_accuracy": 0.65,
217
+ "eval_loss": 0.7716971635818481,
218
+ "eval_runtime": 7.5883,
219
+ "eval_samples_per_second": 28.992,
220
+ "eval_steps_per_second": 3.69,
221
+ "step": 225
222
+ },
223
+ {
224
+ "epoch": 1.85,
225
+ "learning_rate": 0.00010725806451612903,
226
+ "loss": 0.4878,
227
  "step": 230
228
  },
229
  {
230
+ "epoch": 1.94,
231
+ "learning_rate": 0.0001032258064516129,
232
+ "loss": 0.5527,
233
  "step": 240
234
  },
235
  {
236
+ "epoch": 2.02,
237
+ "learning_rate": 9.919354838709678e-05,
238
+ "loss": 0.5911,
239
  "step": 250
240
  },
241
  {
242
+ "epoch": 2.02,
243
+ "eval_accuracy": 0.6363636363636364,
244
+ "eval_loss": 0.8338578939437866,
245
+ "eval_runtime": 8.4178,
246
+ "eval_samples_per_second": 26.135,
247
+ "eval_steps_per_second": 3.326,
248
+ "step": 250
249
+ },
250
+ {
251
+ "epoch": 2.1,
252
+ "learning_rate": 9.516129032258065e-05,
253
+ "loss": 0.3191,
254
  "step": 260
255
  },
256
  {
257
+ "epoch": 2.18,
258
+ "learning_rate": 9.112903225806452e-05,
259
+ "loss": 0.2483,
260
  "step": 270
261
  },
262
  {
263
+ "epoch": 2.22,
264
+ "eval_accuracy": 0.6318181818181818,
265
+ "eval_loss": 0.8596317768096924,
266
+ "eval_runtime": 7.3913,
267
+ "eval_samples_per_second": 29.765,
268
+ "eval_steps_per_second": 3.788,
269
+ "step": 275
270
+ },
271
+ {
272
+ "epoch": 2.26,
273
+ "learning_rate": 8.709677419354839e-05,
274
+ "loss": 0.3026,
275
  "step": 280
276
  },
277
  {
278
+ "epoch": 2.34,
279
+ "learning_rate": 8.306451612903227e-05,
280
+ "loss": 0.229,
281
  "step": 290
282
  },
283
  {
284
+ "epoch": 2.42,
285
+ "learning_rate": 7.903225806451613e-05,
286
+ "loss": 0.378,
287
  "step": 300
288
  },
289
  {
290
+ "epoch": 2.42,
291
+ "eval_accuracy": 0.6181818181818182,
292
+ "eval_loss": 0.9896994829177856,
293
+ "eval_runtime": 7.5826,
294
+ "eval_samples_per_second": 29.014,
295
+ "eval_steps_per_second": 3.693,
296
  "step": 300
297
  },
298
  {
299
+ "epoch": 2.5,
300
+ "learning_rate": 7.500000000000001e-05,
301
+ "loss": 0.2432,
302
  "step": 310
303
  },
304
  {
305
+ "epoch": 2.58,
306
+ "learning_rate": 7.096774193548388e-05,
307
+ "loss": 0.2742,
308
  "step": 320
309
  },
310
  {
311
+ "epoch": 2.62,
312
+ "eval_accuracy": 0.6909090909090909,
313
+ "eval_loss": 0.8965399861335754,
314
+ "eval_runtime": 7.3219,
315
+ "eval_samples_per_second": 30.047,
316
+ "eval_steps_per_second": 3.824,
317
+ "step": 325
318
+ },
319
+ {
320
+ "epoch": 2.66,
321
+ "learning_rate": 6.693548387096774e-05,
322
+ "loss": 0.1942,
323
  "step": 330
324
  },
325
  {
326
+ "epoch": 2.74,
327
+ "learning_rate": 6.290322580645161e-05,
328
+ "loss": 0.179,
329
  "step": 340
330
  },
331
  {
332
+ "epoch": 2.82,
333
+ "learning_rate": 5.887096774193549e-05,
334
+ "loss": 0.1898,
335
+ "step": 350
336
+ },
337
+ {
338
+ "epoch": 2.82,
339
+ "eval_accuracy": 0.6681818181818182,
340
+ "eval_loss": 1.026204228401184,
341
+ "eval_runtime": 7.7003,
342
+ "eval_samples_per_second": 28.57,
343
+ "eval_steps_per_second": 3.636,
344
  "step": 350
345
  },
346
  {
347
+ "epoch": 2.9,
348
+ "learning_rate": 5.4838709677419355e-05,
349
+ "loss": 0.3122,
350
  "step": 360
351
  },
352
  {
353
+ "epoch": 2.98,
354
+ "learning_rate": 5.080645161290323e-05,
355
+ "loss": 0.2116,
356
  "step": 370
357
  },
358
  {
359
+ "epoch": 3.02,
360
+ "eval_accuracy": 0.6409090909090909,
361
+ "eval_loss": 1.1058470010757446,
362
+ "eval_runtime": 7.4881,
363
+ "eval_samples_per_second": 29.38,
364
+ "eval_steps_per_second": 3.739,
365
+ "step": 375
366
+ },
367
+ {
368
+ "epoch": 3.06,
369
+ "learning_rate": 4.67741935483871e-05,
370
+ "loss": 0.2225,
371
  "step": 380
372
  },
373
  {
374
+ "epoch": 3.15,
375
+ "learning_rate": 4.2741935483870973e-05,
376
+ "loss": 0.076,
377
  "step": 390
378
  },
379
  {
380
+ "epoch": 3.23,
381
+ "learning_rate": 3.870967741935484e-05,
382
+ "loss": 0.0702,
383
  "step": 400
384
  },
385
  {
386
+ "epoch": 3.23,
387
+ "eval_accuracy": 0.6545454545454545,
388
+ "eval_loss": 1.0472766160964966,
389
+ "eval_runtime": 7.6075,
390
+ "eval_samples_per_second": 28.919,
391
+ "eval_steps_per_second": 3.681,
392
  "step": 400
393
  },
394
  {
395
+ "epoch": 3.31,
396
+ "learning_rate": 3.467741935483872e-05,
397
+ "loss": 0.0637,
398
  "step": 410
399
  },
400
  {
401
+ "epoch": 3.39,
402
+ "learning_rate": 3.0645161290322585e-05,
403
+ "loss": 0.0566,
404
  "step": 420
405
  },
406
  {
407
+ "epoch": 3.43,
408
+ "eval_accuracy": 0.6681818181818182,
409
+ "eval_loss": 1.0961891412734985,
410
+ "eval_runtime": 8.2248,
411
+ "eval_samples_per_second": 26.748,
412
+ "eval_steps_per_second": 3.404,
413
+ "step": 425
414
+ },
415
+ {
416
+ "epoch": 3.47,
417
+ "learning_rate": 2.661290322580645e-05,
418
+ "loss": 0.0421,
419
  "step": 430
420
  },
421
  {
422
+ "epoch": 3.55,
423
+ "learning_rate": 2.258064516129032e-05,
424
+ "loss": 0.1011,
425
  "step": 440
426
  },
427
  {
428
+ "epoch": 3.63,
429
+ "learning_rate": 1.8951612903225807e-05,
430
+ "loss": 0.0775,
431
+ "step": 450
432
+ },
433
+ {
434
+ "epoch": 3.63,
435
+ "eval_accuracy": 0.65,
436
+ "eval_loss": 1.1501661539077759,
437
+ "eval_runtime": 7.3146,
438
+ "eval_samples_per_second": 30.077,
439
+ "eval_steps_per_second": 3.828,
440
  "step": 450
441
  },
442
  {
443
+ "epoch": 3.71,
444
+ "learning_rate": 1.4919354838709679e-05,
445
+ "loss": 0.0519,
446
+ "step": 460
447
+ },
448
+ {
449
+ "epoch": 3.79,
450
+ "learning_rate": 1.0887096774193549e-05,
451
+ "loss": 0.0485,
452
+ "step": 470
453
+ },
454
+ {
455
+ "epoch": 3.83,
456
+ "eval_accuracy": 0.6454545454545455,
457
+ "eval_loss": 1.1838457584381104,
458
+ "eval_runtime": 7.191,
459
+ "eval_samples_per_second": 30.594,
460
+ "eval_steps_per_second": 3.894,
461
+ "step": 475
462
+ },
463
+ {
464
+ "epoch": 3.87,
465
+ "learning_rate": 6.854838709677419e-06,
466
+ "loss": 0.0686,
467
+ "step": 480
468
+ },
469
+ {
470
+ "epoch": 3.95,
471
+ "learning_rate": 2.82258064516129e-06,
472
+ "loss": 0.0616,
473
+ "step": 490
474
+ },
475
+ {
476
+ "epoch": 4.0,
477
+ "step": 496,
478
+ "total_flos": 6.137420584742093e+17,
479
+ "train_loss": 0.4632607804190728,
480
+ "train_runtime": 749.353,
481
+ "train_samples_per_second": 10.569,
482
+ "train_steps_per_second": 0.662
483
  }
484
  ],
485
+ "max_steps": 496,
486
+ "num_train_epochs": 4,
487
+ "total_flos": 6.137420584742093e+17,
488
  "trial_name": null,
489
  "trial_params": null
490
  }