jayanthspratap commited on
Commit
5df4032
1 Parent(s): 82143c6

End of training

Browse files
Files changed (5) hide show
  1. README.md +1 -1
  2. all_results.json +13 -0
  3. eval_results.json +8 -0
  4. train_results.json +8 -0
  5. trainer_state.json +377 -0
README.md CHANGED
@@ -30,7 +30,7 @@ should probably proofread and complete it, then remove this comment. -->
30
 
31
  This model was trained from scratch on the imagefolder dataset.
32
  It achieves the following results on the evaluation set:
33
- - Loss: 0.6466
34
  - Accuracy: 0.6984
35
 
36
  ## Model description
 
30
 
31
  This model was trained from scratch on the imagefolder dataset.
32
  It achieves the following results on the evaluation set:
33
+ - Loss: 0.6463
34
  - Accuracy: 0.6984
35
 
36
  ## Model description
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.6984126984126984,
4
+ "eval_loss": 0.6463457942008972,
5
+ "eval_runtime": 10.8458,
6
+ "eval_samples_per_second": 5.809,
7
+ "eval_steps_per_second": 1.475,
8
+ "total_flos": 4.3240530204942336e+17,
9
+ "train_loss": 0.6562235273633684,
10
+ "train_runtime": 1176.6645,
11
+ "train_samples_per_second": 4.742,
12
+ "train_steps_per_second": 0.297
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.6984126984126984,
4
+ "eval_loss": 0.6463457942008972,
5
+ "eval_runtime": 10.8458,
6
+ "eval_samples_per_second": 5.809,
7
+ "eval_steps_per_second": 1.475
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "total_flos": 4.3240530204942336e+17,
4
+ "train_loss": 0.6562235273633684,
5
+ "train_runtime": 1176.6645,
6
+ "train_samples_per_second": 4.742,
7
+ "train_steps_per_second": 0.297
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,377 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6984126984126984,
3
+ "best_model_checkpoint": "vit-base-patch16-224/checkpoint-245",
4
+ "epoch": 10.0,
5
+ "eval_steps": 500,
6
+ "global_step": 350,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.2857142857142857,
13
+ "grad_norm": 6.601813793182373,
14
+ "learning_rate": 1.4285714285714286e-06,
15
+ "loss": 0.785,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.5714285714285714,
20
+ "grad_norm": 5.436784267425537,
21
+ "learning_rate": 2.8571428571428573e-06,
22
+ "loss": 0.7224,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.8571428571428571,
27
+ "grad_norm": 5.551754474639893,
28
+ "learning_rate": 4.2857142857142855e-06,
29
+ "loss": 0.6893,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 1.0,
34
+ "eval_accuracy": 0.4126984126984127,
35
+ "eval_loss": 0.7319461703300476,
36
+ "eval_runtime": 12.0068,
37
+ "eval_samples_per_second": 5.247,
38
+ "eval_steps_per_second": 1.333,
39
+ "step": 35
40
+ },
41
+ {
42
+ "epoch": 1.1428571428571428,
43
+ "grad_norm": 7.985645294189453,
44
+ "learning_rate": 4.920634920634921e-06,
45
+ "loss": 0.7348,
46
+ "step": 40
47
+ },
48
+ {
49
+ "epoch": 1.4285714285714286,
50
+ "grad_norm": 4.910062313079834,
51
+ "learning_rate": 4.761904761904762e-06,
52
+ "loss": 0.7049,
53
+ "step": 50
54
+ },
55
+ {
56
+ "epoch": 1.7142857142857144,
57
+ "grad_norm": 4.96162223815918,
58
+ "learning_rate": 4.603174603174604e-06,
59
+ "loss": 0.7185,
60
+ "step": 60
61
+ },
62
+ {
63
+ "epoch": 2.0,
64
+ "grad_norm": 7.957433223724365,
65
+ "learning_rate": 4.444444444444444e-06,
66
+ "loss": 0.702,
67
+ "step": 70
68
+ },
69
+ {
70
+ "epoch": 2.0,
71
+ "eval_accuracy": 0.5238095238095238,
72
+ "eval_loss": 0.686264157295227,
73
+ "eval_runtime": 10.8952,
74
+ "eval_samples_per_second": 5.782,
75
+ "eval_steps_per_second": 1.469,
76
+ "step": 70
77
+ },
78
+ {
79
+ "epoch": 2.2857142857142856,
80
+ "grad_norm": 5.780089378356934,
81
+ "learning_rate": 4.2857142857142855e-06,
82
+ "loss": 0.6473,
83
+ "step": 80
84
+ },
85
+ {
86
+ "epoch": 2.571428571428571,
87
+ "grad_norm": 3.8612518310546875,
88
+ "learning_rate": 4.126984126984127e-06,
89
+ "loss": 0.6843,
90
+ "step": 90
91
+ },
92
+ {
93
+ "epoch": 2.857142857142857,
94
+ "grad_norm": 5.266788482666016,
95
+ "learning_rate": 3.968253968253968e-06,
96
+ "loss": 0.6644,
97
+ "step": 100
98
+ },
99
+ {
100
+ "epoch": 3.0,
101
+ "eval_accuracy": 0.5873015873015873,
102
+ "eval_loss": 0.6796107888221741,
103
+ "eval_runtime": 10.7178,
104
+ "eval_samples_per_second": 5.878,
105
+ "eval_steps_per_second": 1.493,
106
+ "step": 105
107
+ },
108
+ {
109
+ "epoch": 3.142857142857143,
110
+ "grad_norm": 4.334442615509033,
111
+ "learning_rate": 3.80952380952381e-06,
112
+ "loss": 0.6628,
113
+ "step": 110
114
+ },
115
+ {
116
+ "epoch": 3.4285714285714284,
117
+ "grad_norm": 4.810235500335693,
118
+ "learning_rate": 3.6507936507936507e-06,
119
+ "loss": 0.6903,
120
+ "step": 120
121
+ },
122
+ {
123
+ "epoch": 3.7142857142857144,
124
+ "grad_norm": 7.596153736114502,
125
+ "learning_rate": 3.492063492063492e-06,
126
+ "loss": 0.6859,
127
+ "step": 130
128
+ },
129
+ {
130
+ "epoch": 4.0,
131
+ "grad_norm": 8.868383407592773,
132
+ "learning_rate": 3.3333333333333333e-06,
133
+ "loss": 0.645,
134
+ "step": 140
135
+ },
136
+ {
137
+ "epoch": 4.0,
138
+ "eval_accuracy": 0.5714285714285714,
139
+ "eval_loss": 0.6722006797790527,
140
+ "eval_runtime": 10.7432,
141
+ "eval_samples_per_second": 5.864,
142
+ "eval_steps_per_second": 1.489,
143
+ "step": 140
144
+ },
145
+ {
146
+ "epoch": 4.285714285714286,
147
+ "grad_norm": 4.674743175506592,
148
+ "learning_rate": 3.1746031746031746e-06,
149
+ "loss": 0.6336,
150
+ "step": 150
151
+ },
152
+ {
153
+ "epoch": 4.571428571428571,
154
+ "grad_norm": 5.1306023597717285,
155
+ "learning_rate": 3.015873015873016e-06,
156
+ "loss": 0.6607,
157
+ "step": 160
158
+ },
159
+ {
160
+ "epoch": 4.857142857142857,
161
+ "grad_norm": 5.9358015060424805,
162
+ "learning_rate": 2.8571428571428573e-06,
163
+ "loss": 0.6455,
164
+ "step": 170
165
+ },
166
+ {
167
+ "epoch": 5.0,
168
+ "eval_accuracy": 0.6507936507936508,
169
+ "eval_loss": 0.6544514298439026,
170
+ "eval_runtime": 10.8969,
171
+ "eval_samples_per_second": 5.781,
172
+ "eval_steps_per_second": 1.468,
173
+ "step": 175
174
+ },
175
+ {
176
+ "epoch": 5.142857142857143,
177
+ "grad_norm": 4.706002712249756,
178
+ "learning_rate": 2.6984126984126986e-06,
179
+ "loss": 0.6039,
180
+ "step": 180
181
+ },
182
+ {
183
+ "epoch": 5.428571428571429,
184
+ "grad_norm": 5.049504280090332,
185
+ "learning_rate": 2.53968253968254e-06,
186
+ "loss": 0.6881,
187
+ "step": 190
188
+ },
189
+ {
190
+ "epoch": 5.714285714285714,
191
+ "grad_norm": 4.565422534942627,
192
+ "learning_rate": 2.380952380952381e-06,
193
+ "loss": 0.5841,
194
+ "step": 200
195
+ },
196
+ {
197
+ "epoch": 6.0,
198
+ "grad_norm": 5.591720104217529,
199
+ "learning_rate": 2.222222222222222e-06,
200
+ "loss": 0.6456,
201
+ "step": 210
202
+ },
203
+ {
204
+ "epoch": 6.0,
205
+ "eval_accuracy": 0.6507936507936508,
206
+ "eval_loss": 0.6535508036613464,
207
+ "eval_runtime": 10.8606,
208
+ "eval_samples_per_second": 5.801,
209
+ "eval_steps_per_second": 1.473,
210
+ "step": 210
211
+ },
212
+ {
213
+ "epoch": 6.285714285714286,
214
+ "grad_norm": 5.09126091003418,
215
+ "learning_rate": 2.0634920634920634e-06,
216
+ "loss": 0.6349,
217
+ "step": 220
218
+ },
219
+ {
220
+ "epoch": 6.571428571428571,
221
+ "grad_norm": 4.8962812423706055,
222
+ "learning_rate": 1.904761904761905e-06,
223
+ "loss": 0.5735,
224
+ "step": 230
225
+ },
226
+ {
227
+ "epoch": 6.857142857142857,
228
+ "grad_norm": 4.864034175872803,
229
+ "learning_rate": 1.746031746031746e-06,
230
+ "loss": 0.6745,
231
+ "step": 240
232
+ },
233
+ {
234
+ "epoch": 7.0,
235
+ "eval_accuracy": 0.6984126984126984,
236
+ "eval_loss": 0.6463457942008972,
237
+ "eval_runtime": 10.8225,
238
+ "eval_samples_per_second": 5.821,
239
+ "eval_steps_per_second": 1.478,
240
+ "step": 245
241
+ },
242
+ {
243
+ "epoch": 7.142857142857143,
244
+ "grad_norm": 7.017258167266846,
245
+ "learning_rate": 1.5873015873015873e-06,
246
+ "loss": 0.6431,
247
+ "step": 250
248
+ },
249
+ {
250
+ "epoch": 7.428571428571429,
251
+ "grad_norm": 4.79843282699585,
252
+ "learning_rate": 1.4285714285714286e-06,
253
+ "loss": 0.6164,
254
+ "step": 260
255
+ },
256
+ {
257
+ "epoch": 7.714285714285714,
258
+ "grad_norm": 6.843749523162842,
259
+ "learning_rate": 1.26984126984127e-06,
260
+ "loss": 0.644,
261
+ "step": 270
262
+ },
263
+ {
264
+ "epoch": 8.0,
265
+ "grad_norm": 5.495963096618652,
266
+ "learning_rate": 1.111111111111111e-06,
267
+ "loss": 0.6369,
268
+ "step": 280
269
+ },
270
+ {
271
+ "epoch": 8.0,
272
+ "eval_accuracy": 0.6666666666666666,
273
+ "eval_loss": 0.6524515748023987,
274
+ "eval_runtime": 11.0752,
275
+ "eval_samples_per_second": 5.688,
276
+ "eval_steps_per_second": 1.445,
277
+ "step": 280
278
+ },
279
+ {
280
+ "epoch": 8.285714285714286,
281
+ "grad_norm": 7.264893531799316,
282
+ "learning_rate": 9.523809523809525e-07,
283
+ "loss": 0.6661,
284
+ "step": 290
285
+ },
286
+ {
287
+ "epoch": 8.571428571428571,
288
+ "grad_norm": 3.773362636566162,
289
+ "learning_rate": 7.936507936507937e-07,
290
+ "loss": 0.607,
291
+ "step": 300
292
+ },
293
+ {
294
+ "epoch": 8.857142857142858,
295
+ "grad_norm": 6.338254451751709,
296
+ "learning_rate": 6.34920634920635e-07,
297
+ "loss": 0.6012,
298
+ "step": 310
299
+ },
300
+ {
301
+ "epoch": 9.0,
302
+ "eval_accuracy": 0.6984126984126984,
303
+ "eval_loss": 0.6485763788223267,
304
+ "eval_runtime": 10.9179,
305
+ "eval_samples_per_second": 5.77,
306
+ "eval_steps_per_second": 1.465,
307
+ "step": 315
308
+ },
309
+ {
310
+ "epoch": 9.142857142857142,
311
+ "grad_norm": 5.557713031768799,
312
+ "learning_rate": 4.7619047619047623e-07,
313
+ "loss": 0.625,
314
+ "step": 320
315
+ },
316
+ {
317
+ "epoch": 9.428571428571429,
318
+ "grad_norm": 5.070873737335205,
319
+ "learning_rate": 3.174603174603175e-07,
320
+ "loss": 0.6012,
321
+ "step": 330
322
+ },
323
+ {
324
+ "epoch": 9.714285714285714,
325
+ "grad_norm": 6.369267463684082,
326
+ "learning_rate": 1.5873015873015874e-07,
327
+ "loss": 0.6237,
328
+ "step": 340
329
+ },
330
+ {
331
+ "epoch": 10.0,
332
+ "grad_norm": 8.102092742919922,
333
+ "learning_rate": 0.0,
334
+ "loss": 0.6219,
335
+ "step": 350
336
+ },
337
+ {
338
+ "epoch": 10.0,
339
+ "eval_accuracy": 0.6984126984126984,
340
+ "eval_loss": 0.646605372428894,
341
+ "eval_runtime": 11.2095,
342
+ "eval_samples_per_second": 5.62,
343
+ "eval_steps_per_second": 1.427,
344
+ "step": 350
345
+ },
346
+ {
347
+ "epoch": 10.0,
348
+ "step": 350,
349
+ "total_flos": 4.3240530204942336e+17,
350
+ "train_loss": 0.6562235273633684,
351
+ "train_runtime": 1176.6645,
352
+ "train_samples_per_second": 4.742,
353
+ "train_steps_per_second": 0.297
354
+ }
355
+ ],
356
+ "logging_steps": 10,
357
+ "max_steps": 350,
358
+ "num_input_tokens_seen": 0,
359
+ "num_train_epochs": 10,
360
+ "save_steps": 500,
361
+ "stateful_callbacks": {
362
+ "TrainerControl": {
363
+ "args": {
364
+ "should_epoch_stop": false,
365
+ "should_evaluate": false,
366
+ "should_log": false,
367
+ "should_save": true,
368
+ "should_training_stop": true
369
+ },
370
+ "attributes": {}
371
+ }
372
+ },
373
+ "total_flos": 4.3240530204942336e+17,
374
+ "train_batch_size": 4,
375
+ "trial_name": null,
376
+ "trial_params": null
377
+ }