system-admin commited on
Commit
91f27c2
1 Parent(s): 75f2059

Training in progress, epoch 1

Browse files
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 20.0,
3
+ "eval_accuracy": 0.87,
4
+ "eval_loss": 0.40429016947746277,
5
+ "eval_runtime": 2.5766,
6
+ "eval_samples_per_second": 38.811,
7
+ "eval_steps_per_second": 1.552,
8
+ "total_flos": 1.98847911886848e+17,
9
+ "train_loss": 0.30984322795501124,
10
+ "train_runtime": 343.6308,
11
+ "train_samples_per_second": 23.281,
12
+ "train_steps_per_second": 0.757
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 20.0,
3
+ "eval_accuracy": 0.87,
4
+ "eval_loss": 0.40429016947746277,
5
+ "eval_runtime": 2.5766,
6
+ "eval_samples_per_second": 38.811,
7
+ "eval_steps_per_second": 1.552
8
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64119251aae881b9f732c9567a2f488f49243d8eed4c21d792eb9755430d1172
3
  size 110342832
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44379d7a5346d81411cae547be326fc0568a26757c220f8ef6d67d19c560b224
3
  size 110342832
runs/May01_14-50-35_d6eb9620a945/events.out.tfevents.1714575447.d6eb9620a945.304.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aad33798289c2a26234c98cdd259dc2ba9cbe5f21ba1ef00e248574084fab4bc
3
+ size 411
runs/May01_15-13-06_d6eb9620a945/events.out.tfevents.1714576415.d6eb9620a945.304.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a742bde1d7176ea4e22ae12fc5179fa6b5c23078b47270dc3b649f5e708293f1
3
+ size 4184
runs/May01_15-13-06_d6eb9620a945/events.out.tfevents.1714576432.d6eb9620a945.304.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12451ffbbd72896acb4139307b7142b44faecb9133619f4ef86f01d18f2c4091
3
+ size 5776
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 20.0,
3
+ "total_flos": 1.98847911886848e+17,
4
+ "train_loss": 0.30984322795501124,
5
+ "train_runtime": 343.6308,
6
+ "train_samples_per_second": 23.281,
7
+ "train_steps_per_second": 0.757
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.87,
3
+ "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-student_two_classes/checkpoint-117",
4
+ "epoch": 20.0,
5
+ "eval_steps": 500,
6
+ "global_step": 260,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.7692307692307693,
13
+ "grad_norm": 11.056398391723633,
14
+ "learning_rate": 1.923076923076923e-05,
15
+ "loss": 0.6951,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.82,
21
+ "eval_loss": 0.4448259770870209,
22
+ "eval_runtime": 3.0273,
23
+ "eval_samples_per_second": 33.032,
24
+ "eval_steps_per_second": 1.321,
25
+ "step": 13
26
+ },
27
+ {
28
+ "epoch": 1.5384615384615383,
29
+ "grad_norm": 6.355745315551758,
30
+ "learning_rate": 3.846153846153846e-05,
31
+ "loss": 0.4292,
32
+ "step": 20
33
+ },
34
+ {
35
+ "epoch": 2.0,
36
+ "eval_accuracy": 0.82,
37
+ "eval_loss": 0.44606465101242065,
38
+ "eval_runtime": 2.4722,
39
+ "eval_samples_per_second": 40.45,
40
+ "eval_steps_per_second": 1.618,
41
+ "step": 26
42
+ },
43
+ {
44
+ "epoch": 2.3076923076923075,
45
+ "grad_norm": 5.6881914138793945,
46
+ "learning_rate": 4.9145299145299147e-05,
47
+ "loss": 0.4246,
48
+ "step": 30
49
+ },
50
+ {
51
+ "epoch": 3.0,
52
+ "eval_accuracy": 0.82,
53
+ "eval_loss": 0.4553927481174469,
54
+ "eval_runtime": 2.5808,
55
+ "eval_samples_per_second": 38.748,
56
+ "eval_steps_per_second": 1.55,
57
+ "step": 39
58
+ },
59
+ {
60
+ "epoch": 3.076923076923077,
61
+ "grad_norm": 9.89670181274414,
62
+ "learning_rate": 4.700854700854701e-05,
63
+ "loss": 0.4424,
64
+ "step": 40
65
+ },
66
+ {
67
+ "epoch": 3.8461538461538463,
68
+ "grad_norm": 6.457494258880615,
69
+ "learning_rate": 4.4871794871794874e-05,
70
+ "loss": 0.3983,
71
+ "step": 50
72
+ },
73
+ {
74
+ "epoch": 4.0,
75
+ "eval_accuracy": 0.83,
76
+ "eval_loss": 0.4219551384449005,
77
+ "eval_runtime": 2.9766,
78
+ "eval_samples_per_second": 33.596,
79
+ "eval_steps_per_second": 1.344,
80
+ "step": 52
81
+ },
82
+ {
83
+ "epoch": 4.615384615384615,
84
+ "grad_norm": 5.4807610511779785,
85
+ "learning_rate": 4.2735042735042735e-05,
86
+ "loss": 0.314,
87
+ "step": 60
88
+ },
89
+ {
90
+ "epoch": 5.0,
91
+ "eval_accuracy": 0.83,
92
+ "eval_loss": 0.44294700026512146,
93
+ "eval_runtime": 2.417,
94
+ "eval_samples_per_second": 41.374,
95
+ "eval_steps_per_second": 1.655,
96
+ "step": 65
97
+ },
98
+ {
99
+ "epoch": 5.384615384615385,
100
+ "grad_norm": 5.0344414710998535,
101
+ "learning_rate": 4.05982905982906e-05,
102
+ "loss": 0.4176,
103
+ "step": 70
104
+ },
105
+ {
106
+ "epoch": 6.0,
107
+ "eval_accuracy": 0.82,
108
+ "eval_loss": 0.4005734324455261,
109
+ "eval_runtime": 3.0995,
110
+ "eval_samples_per_second": 32.264,
111
+ "eval_steps_per_second": 1.291,
112
+ "step": 78
113
+ },
114
+ {
115
+ "epoch": 6.153846153846154,
116
+ "grad_norm": 6.132039546966553,
117
+ "learning_rate": 3.846153846153846e-05,
118
+ "loss": 0.3512,
119
+ "step": 80
120
+ },
121
+ {
122
+ "epoch": 6.923076923076923,
123
+ "grad_norm": 3.423215389251709,
124
+ "learning_rate": 3.6324786324786323e-05,
125
+ "loss": 0.2862,
126
+ "step": 90
127
+ },
128
+ {
129
+ "epoch": 7.0,
130
+ "eval_accuracy": 0.84,
131
+ "eval_loss": 0.4145370423793793,
132
+ "eval_runtime": 2.5336,
133
+ "eval_samples_per_second": 39.47,
134
+ "eval_steps_per_second": 1.579,
135
+ "step": 91
136
+ },
137
+ {
138
+ "epoch": 7.6923076923076925,
139
+ "grad_norm": 4.900909423828125,
140
+ "learning_rate": 3.418803418803419e-05,
141
+ "loss": 0.3072,
142
+ "step": 100
143
+ },
144
+ {
145
+ "epoch": 8.0,
146
+ "eval_accuracy": 0.83,
147
+ "eval_loss": 0.38473400473594666,
148
+ "eval_runtime": 2.5184,
149
+ "eval_samples_per_second": 39.708,
150
+ "eval_steps_per_second": 1.588,
151
+ "step": 104
152
+ },
153
+ {
154
+ "epoch": 8.461538461538462,
155
+ "grad_norm": 6.61647891998291,
156
+ "learning_rate": 3.205128205128206e-05,
157
+ "loss": 0.3001,
158
+ "step": 110
159
+ },
160
+ {
161
+ "epoch": 9.0,
162
+ "eval_accuracy": 0.87,
163
+ "eval_loss": 0.40429016947746277,
164
+ "eval_runtime": 3.3603,
165
+ "eval_samples_per_second": 29.76,
166
+ "eval_steps_per_second": 1.19,
167
+ "step": 117
168
+ },
169
+ {
170
+ "epoch": 9.23076923076923,
171
+ "grad_norm": 9.093049049377441,
172
+ "learning_rate": 2.9914529914529915e-05,
173
+ "loss": 0.3225,
174
+ "step": 120
175
+ },
176
+ {
177
+ "epoch": 10.0,
178
+ "grad_norm": 8.699834823608398,
179
+ "learning_rate": 2.777777777777778e-05,
180
+ "loss": 0.2937,
181
+ "step": 130
182
+ },
183
+ {
184
+ "epoch": 10.0,
185
+ "eval_accuracy": 0.82,
186
+ "eval_loss": 0.40262600779533386,
187
+ "eval_runtime": 2.4958,
188
+ "eval_samples_per_second": 40.067,
189
+ "eval_steps_per_second": 1.603,
190
+ "step": 130
191
+ },
192
+ {
193
+ "epoch": 10.76923076923077,
194
+ "grad_norm": 3.0456953048706055,
195
+ "learning_rate": 2.564102564102564e-05,
196
+ "loss": 0.2206,
197
+ "step": 140
198
+ },
199
+ {
200
+ "epoch": 11.0,
201
+ "eval_accuracy": 0.83,
202
+ "eval_loss": 0.3972433805465698,
203
+ "eval_runtime": 2.6788,
204
+ "eval_samples_per_second": 37.329,
205
+ "eval_steps_per_second": 1.493,
206
+ "step": 143
207
+ },
208
+ {
209
+ "epoch": 11.538461538461538,
210
+ "grad_norm": 6.638078689575195,
211
+ "learning_rate": 2.3504273504273504e-05,
212
+ "loss": 0.2287,
213
+ "step": 150
214
+ },
215
+ {
216
+ "epoch": 12.0,
217
+ "eval_accuracy": 0.86,
218
+ "eval_loss": 0.38398703932762146,
219
+ "eval_runtime": 2.4563,
220
+ "eval_samples_per_second": 40.711,
221
+ "eval_steps_per_second": 1.628,
222
+ "step": 156
223
+ },
224
+ {
225
+ "epoch": 12.307692307692308,
226
+ "grad_norm": 6.832516670227051,
227
+ "learning_rate": 2.1367521367521368e-05,
228
+ "loss": 0.3318,
229
+ "step": 160
230
+ },
231
+ {
232
+ "epoch": 13.0,
233
+ "eval_accuracy": 0.84,
234
+ "eval_loss": 0.3740682899951935,
235
+ "eval_runtime": 2.6452,
236
+ "eval_samples_per_second": 37.805,
237
+ "eval_steps_per_second": 1.512,
238
+ "step": 169
239
+ },
240
+ {
241
+ "epoch": 13.076923076923077,
242
+ "grad_norm": 6.917372226715088,
243
+ "learning_rate": 1.923076923076923e-05,
244
+ "loss": 0.2099,
245
+ "step": 170
246
+ },
247
+ {
248
+ "epoch": 13.846153846153847,
249
+ "grad_norm": 6.500430107116699,
250
+ "learning_rate": 1.7094017094017095e-05,
251
+ "loss": 0.232,
252
+ "step": 180
253
+ },
254
+ {
255
+ "epoch": 14.0,
256
+ "eval_accuracy": 0.85,
257
+ "eval_loss": 0.38503700494766235,
258
+ "eval_runtime": 2.5332,
259
+ "eval_samples_per_second": 39.476,
260
+ "eval_steps_per_second": 1.579,
261
+ "step": 182
262
+ },
263
+ {
264
+ "epoch": 14.615384615384615,
265
+ "grad_norm": 7.342576503753662,
266
+ "learning_rate": 1.4957264957264958e-05,
267
+ "loss": 0.2277,
268
+ "step": 190
269
+ },
270
+ {
271
+ "epoch": 15.0,
272
+ "eval_accuracy": 0.85,
273
+ "eval_loss": 0.3989230692386627,
274
+ "eval_runtime": 2.5166,
275
+ "eval_samples_per_second": 39.737,
276
+ "eval_steps_per_second": 1.589,
277
+ "step": 195
278
+ },
279
+ {
280
+ "epoch": 15.384615384615385,
281
+ "grad_norm": 6.8460235595703125,
282
+ "learning_rate": 1.282051282051282e-05,
283
+ "loss": 0.2253,
284
+ "step": 200
285
+ },
286
+ {
287
+ "epoch": 16.0,
288
+ "eval_accuracy": 0.85,
289
+ "eval_loss": 0.4070873260498047,
290
+ "eval_runtime": 3.0059,
291
+ "eval_samples_per_second": 33.267,
292
+ "eval_steps_per_second": 1.331,
293
+ "step": 208
294
+ },
295
+ {
296
+ "epoch": 16.153846153846153,
297
+ "grad_norm": 3.827336311340332,
298
+ "learning_rate": 1.0683760683760684e-05,
299
+ "loss": 0.2202,
300
+ "step": 210
301
+ },
302
+ {
303
+ "epoch": 16.923076923076923,
304
+ "grad_norm": 6.873669624328613,
305
+ "learning_rate": 8.547008547008548e-06,
306
+ "loss": 0.2463,
307
+ "step": 220
308
+ },
309
+ {
310
+ "epoch": 17.0,
311
+ "eval_accuracy": 0.85,
312
+ "eval_loss": 0.40268489718437195,
313
+ "eval_runtime": 2.5105,
314
+ "eval_samples_per_second": 39.833,
315
+ "eval_steps_per_second": 1.593,
316
+ "step": 221
317
+ },
318
+ {
319
+ "epoch": 17.692307692307693,
320
+ "grad_norm": 8.530336380004883,
321
+ "learning_rate": 6.41025641025641e-06,
322
+ "loss": 0.2496,
323
+ "step": 230
324
+ },
325
+ {
326
+ "epoch": 18.0,
327
+ "eval_accuracy": 0.83,
328
+ "eval_loss": 0.41463416814804077,
329
+ "eval_runtime": 2.5445,
330
+ "eval_samples_per_second": 39.301,
331
+ "eval_steps_per_second": 1.572,
332
+ "step": 234
333
+ },
334
+ {
335
+ "epoch": 18.46153846153846,
336
+ "grad_norm": 3.6479477882385254,
337
+ "learning_rate": 4.273504273504274e-06,
338
+ "loss": 0.1969,
339
+ "step": 240
340
+ },
341
+ {
342
+ "epoch": 19.0,
343
+ "eval_accuracy": 0.83,
344
+ "eval_loss": 0.41043660044670105,
345
+ "eval_runtime": 3.1127,
346
+ "eval_samples_per_second": 32.127,
347
+ "eval_steps_per_second": 1.285,
348
+ "step": 247
349
+ },
350
+ {
351
+ "epoch": 19.23076923076923,
352
+ "grad_norm": 5.254897117614746,
353
+ "learning_rate": 2.136752136752137e-06,
354
+ "loss": 0.257,
355
+ "step": 250
356
+ },
357
+ {
358
+ "epoch": 20.0,
359
+ "grad_norm": 8.73101806640625,
360
+ "learning_rate": 0.0,
361
+ "loss": 0.2279,
362
+ "step": 260
363
+ },
364
+ {
365
+ "epoch": 20.0,
366
+ "eval_accuracy": 0.82,
367
+ "eval_loss": 0.41871729493141174,
368
+ "eval_runtime": 2.516,
369
+ "eval_samples_per_second": 39.746,
370
+ "eval_steps_per_second": 1.59,
371
+ "step": 260
372
+ },
373
+ {
374
+ "epoch": 20.0,
375
+ "step": 260,
376
+ "total_flos": 1.98847911886848e+17,
377
+ "train_loss": 0.30984322795501124,
378
+ "train_runtime": 343.6308,
379
+ "train_samples_per_second": 23.281,
380
+ "train_steps_per_second": 0.757
381
+ }
382
+ ],
383
+ "logging_steps": 10,
384
+ "max_steps": 260,
385
+ "num_input_tokens_seen": 0,
386
+ "num_train_epochs": 20,
387
+ "save_steps": 500,
388
+ "total_flos": 1.98847911886848e+17,
389
+ "train_batch_size": 32,
390
+ "trial_name": null,
391
+ "trial_params": null
392
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5ef872a633fabc4ab7dc1a16d9ebd4bac4e621d7c438c463cddf45c8eb1f04e
3
  size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e52f74d254afa65d0442792843c29612d3340e5c4e9e5f971ddaf13443f39957
3
  size 5112