younggi commited on
Commit
61c8c46
1 Parent(s): 6a67d94

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +5 -5
  2. test_results.json +5 -5
  3. trainer_state.json +284 -104
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.25,
3
- "eval_accuracy": 0.5419354838709678,
4
- "eval_loss": 1.474304437637329,
5
- "eval_runtime": 12.5121,
6
- "eval_samples_per_second": 12.388,
7
- "eval_steps_per_second": 3.117
8
  }
 
1
  {
2
  "epoch": 3.25,
3
+ "eval_accuracy": 0.6387096774193548,
4
+ "eval_loss": 1.15707266330719,
5
+ "eval_runtime": 11.086,
6
+ "eval_samples_per_second": 13.982,
7
+ "eval_steps_per_second": 7.036
8
  }
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.25,
3
- "eval_accuracy": 0.5419354838709678,
4
- "eval_loss": 1.474304437637329,
5
- "eval_runtime": 12.5121,
6
- "eval_samples_per_second": 12.388,
7
- "eval_steps_per_second": 3.117
8
  }
 
1
  {
2
  "epoch": 3.25,
3
+ "eval_accuracy": 0.6387096774193548,
4
+ "eval_loss": 1.15707266330719,
5
+ "eval_runtime": 11.086,
6
+ "eval_samples_per_second": 13.982,
7
+ "eval_steps_per_second": 7.036
8
  }
trainer_state.json CHANGED
@@ -1,257 +1,437 @@
1
  {
2
- "best_metric": 0.37142857142857144,
3
- "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-300",
4
  "epoch": 3.25,
5
- "global_step": 300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
10
  {
11
  "epoch": 0.03,
12
  "learning_rate": 1.6666666666666667e-05,
13
- "loss": 2.3855,
14
- "step": 10
 
 
 
 
 
 
15
  },
16
  {
17
  "epoch": 0.07,
18
  "learning_rate": 3.3333333333333335e-05,
19
- "loss": 2.2966,
20
- "step": 20
 
 
 
 
 
 
21
  },
22
  {
23
  "epoch": 0.1,
24
  "learning_rate": 5e-05,
25
- "loss": 2.3017,
26
- "step": 30
 
 
 
 
 
 
27
  },
28
  {
29
  "epoch": 0.13,
30
  "learning_rate": 4.814814814814815e-05,
31
- "loss": 2.3663,
32
- "step": 40
 
 
 
 
 
 
33
  },
34
  {
35
  "epoch": 0.17,
36
  "learning_rate": 4.62962962962963e-05,
37
- "loss": 2.3098,
38
- "step": 50
 
 
 
 
 
 
39
  },
40
  {
41
  "epoch": 0.2,
42
  "learning_rate": 4.4444444444444447e-05,
43
- "loss": 2.2747,
44
- "step": 60
 
 
 
 
 
 
45
  },
46
  {
47
  "epoch": 0.23,
48
  "learning_rate": 4.259259259259259e-05,
49
- "loss": 2.3247,
50
- "step": 70
 
 
 
 
 
 
51
  },
52
  {
53
  "epoch": 0.25,
54
  "eval_accuracy": 0.21428571428571427,
55
- "eval_loss": 2.210233211517334,
56
- "eval_runtime": 5.7285,
57
- "eval_samples_per_second": 12.22,
58
- "eval_steps_per_second": 3.142,
59
- "step": 75
60
  },
61
  {
62
  "epoch": 1.02,
63
  "learning_rate": 4.074074074074074e-05,
64
- "loss": 2.2727,
65
- "step": 80
 
 
 
 
 
 
66
  },
67
  {
68
  "epoch": 1.05,
69
  "learning_rate": 3.888888888888889e-05,
70
- "loss": 2.1971,
71
- "step": 90
 
 
 
 
 
 
72
  },
73
  {
74
  "epoch": 1.08,
75
  "learning_rate": 3.7037037037037037e-05,
76
- "loss": 2.2692,
77
- "step": 100
 
 
 
 
 
 
78
  },
79
  {
80
  "epoch": 1.12,
81
  "learning_rate": 3.518518518518519e-05,
82
- "loss": 2.223,
83
- "step": 110
 
 
 
 
 
 
84
  },
85
  {
86
  "epoch": 1.15,
87
  "learning_rate": 3.3333333333333335e-05,
88
- "loss": 2.0977,
89
- "step": 120
 
 
 
 
 
 
90
  },
91
  {
92
  "epoch": 1.18,
93
  "learning_rate": 3.148148148148148e-05,
94
- "loss": 2.0419,
95
- "step": 130
 
 
 
 
 
 
96
  },
97
  {
98
  "epoch": 1.22,
99
  "learning_rate": 2.962962962962963e-05,
100
- "loss": 1.9612,
101
- "step": 140
 
 
 
 
 
 
102
  },
103
  {
104
  "epoch": 1.25,
105
  "learning_rate": 2.777777777777778e-05,
106
- "loss": 2.2528,
107
- "step": 150
108
  },
109
  {
110
  "epoch": 1.25,
111
- "eval_accuracy": 0.15714285714285714,
112
- "eval_loss": 2.047905206680298,
113
- "eval_runtime": 5.4882,
114
- "eval_samples_per_second": 12.755,
115
- "eval_steps_per_second": 3.28,
116
- "step": 150
 
 
 
 
 
 
117
  },
118
  {
119
  "epoch": 2.03,
120
  "learning_rate": 2.5925925925925925e-05,
121
- "loss": 2.1757,
122
- "step": 160
 
 
 
 
 
 
123
  },
124
  {
125
  "epoch": 2.07,
126
  "learning_rate": 2.4074074074074074e-05,
127
- "loss": 1.9829,
128
- "step": 170
 
 
 
 
 
 
129
  },
130
  {
131
  "epoch": 2.1,
132
  "learning_rate": 2.2222222222222223e-05,
133
- "loss": 2.0055,
134
- "step": 180
 
 
 
 
 
 
135
  },
136
  {
137
  "epoch": 2.13,
138
  "learning_rate": 2.037037037037037e-05,
139
- "loss": 1.8768,
140
- "step": 190
 
 
 
 
 
 
141
  },
142
  {
143
  "epoch": 2.17,
144
  "learning_rate": 1.8518518518518518e-05,
145
- "loss": 1.7034,
146
- "step": 200
 
 
 
 
 
 
147
  },
148
  {
149
  "epoch": 2.2,
150
  "learning_rate": 1.6666666666666667e-05,
151
- "loss": 1.7559,
152
- "step": 210
 
 
 
 
 
 
153
  },
154
  {
155
  "epoch": 2.23,
156
  "learning_rate": 1.4814814814814815e-05,
157
- "loss": 1.8267,
158
- "step": 220
159
  },
160
  {
161
  "epoch": 2.25,
162
- "eval_accuracy": 0.32857142857142857,
163
- "eval_loss": 1.7604564428329468,
164
- "eval_runtime": 5.5106,
165
- "eval_samples_per_second": 12.703,
166
- "eval_steps_per_second": 3.266,
167
- "step": 225
 
 
 
 
 
 
168
  },
169
  {
170
  "epoch": 3.02,
171
  "learning_rate": 1.2962962962962962e-05,
172
- "loss": 1.737,
173
- "step": 230
 
 
 
 
 
 
174
  },
175
  {
176
  "epoch": 3.05,
177
  "learning_rate": 1.1111111111111112e-05,
178
- "loss": 1.7787,
179
- "step": 240
 
 
 
 
 
 
180
  },
181
  {
182
  "epoch": 3.08,
183
  "learning_rate": 9.259259259259259e-06,
184
- "loss": 1.8136,
185
- "step": 250
 
 
 
 
 
 
186
  },
187
  {
188
  "epoch": 3.12,
189
  "learning_rate": 7.4074074074074075e-06,
190
- "loss": 1.6954,
191
- "step": 260
 
 
 
 
 
 
192
  },
193
  {
194
  "epoch": 3.15,
195
  "learning_rate": 5.555555555555556e-06,
196
- "loss": 1.6381,
197
- "step": 270
 
 
 
 
 
 
198
  },
199
  {
200
  "epoch": 3.18,
201
  "learning_rate": 3.7037037037037037e-06,
202
- "loss": 1.6797,
203
- "step": 280
 
 
 
 
 
 
204
  },
205
  {
206
  "epoch": 3.22,
207
  "learning_rate": 1.8518518518518519e-06,
208
- "loss": 1.4995,
209
- "step": 290
 
 
 
 
 
 
210
  },
211
  {
212
  "epoch": 3.25,
213
  "learning_rate": 0.0,
214
- "loss": 1.5876,
215
- "step": 300
216
  },
217
  {
218
  "epoch": 3.25,
219
- "eval_accuracy": 0.37142857142857144,
220
- "eval_loss": 1.6022236347198486,
221
- "eval_runtime": 5.771,
222
- "eval_samples_per_second": 12.13,
223
- "eval_steps_per_second": 3.119,
224
- "step": 300
225
  },
226
  {
227
  "epoch": 3.25,
228
- "step": 300,
229
  "total_flos": 1.495384188125184e+18,
230
- "train_loss": 2.0110448233286538,
231
- "train_runtime": 226.017,
232
- "train_samples_per_second": 5.309,
233
- "train_steps_per_second": 1.327
234
  },
235
  {
236
  "epoch": 3.25,
237
- "eval_accuracy": 0.5419354838709678,
238
- "eval_loss": 1.4743043184280396,
239
- "eval_runtime": 13.0002,
240
- "eval_samples_per_second": 11.923,
241
- "eval_steps_per_second": 3.0,
242
- "step": 300
243
  },
244
  {
245
  "epoch": 3.25,
246
- "eval_accuracy": 0.5419354838709678,
247
- "eval_loss": 1.474304437637329,
248
- "eval_runtime": 12.5121,
249
- "eval_samples_per_second": 12.388,
250
- "eval_steps_per_second": 3.117,
251
- "step": 300
252
  }
253
  ],
254
- "max_steps": 300,
255
  "num_train_epochs": 9223372036854775807,
256
  "total_flos": 1.495384188125184e+18,
257
  "trial_name": null,
 
1
  {
2
+ "best_metric": 0.45714285714285713,
3
+ "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-600",
4
  "epoch": 3.25,
5
+ "global_step": 600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
+ {
11
+ "epoch": 0.02,
12
+ "learning_rate": 8.333333333333334e-06,
13
+ "loss": 2.3667,
14
+ "step": 10
15
+ },
16
  {
17
  "epoch": 0.03,
18
  "learning_rate": 1.6666666666666667e-05,
19
+ "loss": 2.3348,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.05,
24
+ "learning_rate": 2.5e-05,
25
+ "loss": 2.3995,
26
+ "step": 30
27
  },
28
  {
29
  "epoch": 0.07,
30
  "learning_rate": 3.3333333333333335e-05,
31
+ "loss": 2.313,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.08,
36
+ "learning_rate": 4.166666666666667e-05,
37
+ "loss": 2.3326,
38
+ "step": 50
39
  },
40
  {
41
  "epoch": 0.1,
42
  "learning_rate": 5e-05,
43
+ "loss": 2.2494,
44
+ "step": 60
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "learning_rate": 4.9074074074074075e-05,
49
+ "loss": 2.388,
50
+ "step": 70
51
  },
52
  {
53
  "epoch": 0.13,
54
  "learning_rate": 4.814814814814815e-05,
55
+ "loss": 2.3325,
56
+ "step": 80
57
+ },
58
+ {
59
+ "epoch": 0.15,
60
+ "learning_rate": 4.722222222222222e-05,
61
+ "loss": 2.534,
62
+ "step": 90
63
  },
64
  {
65
  "epoch": 0.17,
66
  "learning_rate": 4.62962962962963e-05,
67
+ "loss": 2.1932,
68
+ "step": 100
69
+ },
70
+ {
71
+ "epoch": 0.18,
72
+ "learning_rate": 4.5370370370370374e-05,
73
+ "loss": 2.4753,
74
+ "step": 110
75
  },
76
  {
77
  "epoch": 0.2,
78
  "learning_rate": 4.4444444444444447e-05,
79
+ "loss": 2.2637,
80
+ "step": 120
81
+ },
82
+ {
83
+ "epoch": 0.22,
84
+ "learning_rate": 4.351851851851852e-05,
85
+ "loss": 2.3741,
86
+ "step": 130
87
  },
88
  {
89
  "epoch": 0.23,
90
  "learning_rate": 4.259259259259259e-05,
91
+ "loss": 2.3961,
92
+ "step": 140
93
+ },
94
+ {
95
+ "epoch": 0.25,
96
+ "learning_rate": 4.166666666666667e-05,
97
+ "loss": 2.2528,
98
+ "step": 150
99
  },
100
  {
101
  "epoch": 0.25,
102
  "eval_accuracy": 0.21428571428571427,
103
+ "eval_loss": 2.237157106399536,
104
+ "eval_runtime": 5.1747,
105
+ "eval_samples_per_second": 13.527,
106
+ "eval_steps_per_second": 6.764,
107
+ "step": 150
108
  },
109
  {
110
  "epoch": 1.02,
111
  "learning_rate": 4.074074074074074e-05,
112
+ "loss": 2.3843,
113
+ "step": 160
114
+ },
115
+ {
116
+ "epoch": 1.03,
117
+ "learning_rate": 3.981481481481482e-05,
118
+ "loss": 2.3227,
119
+ "step": 170
120
  },
121
  {
122
  "epoch": 1.05,
123
  "learning_rate": 3.888888888888889e-05,
124
+ "loss": 2.2261,
125
+ "step": 180
126
+ },
127
+ {
128
+ "epoch": 1.07,
129
+ "learning_rate": 3.7962962962962964e-05,
130
+ "loss": 2.2536,
131
+ "step": 190
132
  },
133
  {
134
  "epoch": 1.08,
135
  "learning_rate": 3.7037037037037037e-05,
136
+ "loss": 2.2081,
137
+ "step": 200
138
+ },
139
+ {
140
+ "epoch": 1.1,
141
+ "learning_rate": 3.611111111111111e-05,
142
+ "loss": 2.3562,
143
+ "step": 210
144
  },
145
  {
146
  "epoch": 1.12,
147
  "learning_rate": 3.518518518518519e-05,
148
+ "loss": 2.1536,
149
+ "step": 220
150
+ },
151
+ {
152
+ "epoch": 1.13,
153
+ "learning_rate": 3.425925925925926e-05,
154
+ "loss": 2.2,
155
+ "step": 230
156
  },
157
  {
158
  "epoch": 1.15,
159
  "learning_rate": 3.3333333333333335e-05,
160
+ "loss": 2.3511,
161
+ "step": 240
162
+ },
163
+ {
164
+ "epoch": 1.17,
165
+ "learning_rate": 3.240740740740741e-05,
166
+ "loss": 2.0288,
167
+ "step": 250
168
  },
169
  {
170
  "epoch": 1.18,
171
  "learning_rate": 3.148148148148148e-05,
172
+ "loss": 2.2663,
173
+ "step": 260
174
+ },
175
+ {
176
+ "epoch": 1.2,
177
+ "learning_rate": 3.055555555555556e-05,
178
+ "loss": 2.0513,
179
+ "step": 270
180
  },
181
  {
182
  "epoch": 1.22,
183
  "learning_rate": 2.962962962962963e-05,
184
+ "loss": 2.1505,
185
+ "step": 280
186
+ },
187
+ {
188
+ "epoch": 1.23,
189
+ "learning_rate": 2.8703703703703706e-05,
190
+ "loss": 2.2131,
191
+ "step": 290
192
  },
193
  {
194
  "epoch": 1.25,
195
  "learning_rate": 2.777777777777778e-05,
196
+ "loss": 2.2692,
197
+ "step": 300
198
  },
199
  {
200
  "epoch": 1.25,
201
+ "eval_accuracy": 0.32857142857142857,
202
+ "eval_loss": 1.802632212638855,
203
+ "eval_runtime": 4.9345,
204
+ "eval_samples_per_second": 14.186,
205
+ "eval_steps_per_second": 7.093,
206
+ "step": 300
207
+ },
208
+ {
209
+ "epoch": 2.02,
210
+ "learning_rate": 2.6851851851851855e-05,
211
+ "loss": 2.2107,
212
+ "step": 310
213
  },
214
  {
215
  "epoch": 2.03,
216
  "learning_rate": 2.5925925925925925e-05,
217
+ "loss": 2.1825,
218
+ "step": 320
219
+ },
220
+ {
221
+ "epoch": 2.05,
222
+ "learning_rate": 2.5e-05,
223
+ "loss": 1.9586,
224
+ "step": 330
225
  },
226
  {
227
  "epoch": 2.07,
228
  "learning_rate": 2.4074074074074074e-05,
229
+ "loss": 1.7398,
230
+ "step": 340
231
+ },
232
+ {
233
+ "epoch": 2.08,
234
+ "learning_rate": 2.314814814814815e-05,
235
+ "loss": 2.3177,
236
+ "step": 350
237
  },
238
  {
239
  "epoch": 2.1,
240
  "learning_rate": 2.2222222222222223e-05,
241
+ "loss": 2.041,
242
+ "step": 360
243
+ },
244
+ {
245
+ "epoch": 2.12,
246
+ "learning_rate": 2.1296296296296296e-05,
247
+ "loss": 1.8122,
248
+ "step": 370
249
  },
250
  {
251
  "epoch": 2.13,
252
  "learning_rate": 2.037037037037037e-05,
253
+ "loss": 1.8686,
254
+ "step": 380
255
+ },
256
+ {
257
+ "epoch": 2.15,
258
+ "learning_rate": 1.9444444444444445e-05,
259
+ "loss": 1.6614,
260
+ "step": 390
261
  },
262
  {
263
  "epoch": 2.17,
264
  "learning_rate": 1.8518518518518518e-05,
265
+ "loss": 1.7773,
266
+ "step": 400
267
+ },
268
+ {
269
+ "epoch": 2.18,
270
+ "learning_rate": 1.7592592592592595e-05,
271
+ "loss": 1.6885,
272
+ "step": 410
273
  },
274
  {
275
  "epoch": 2.2,
276
  "learning_rate": 1.6666666666666667e-05,
277
+ "loss": 1.8929,
278
+ "step": 420
279
+ },
280
+ {
281
+ "epoch": 2.22,
282
+ "learning_rate": 1.574074074074074e-05,
283
+ "loss": 1.4253,
284
+ "step": 430
285
  },
286
  {
287
  "epoch": 2.23,
288
  "learning_rate": 1.4814814814814815e-05,
289
+ "loss": 2.0666,
290
+ "step": 440
291
  },
292
  {
293
  "epoch": 2.25,
294
+ "learning_rate": 1.388888888888889e-05,
295
+ "loss": 1.7863,
296
+ "step": 450
297
+ },
298
+ {
299
+ "epoch": 2.25,
300
+ "eval_accuracy": 0.38571428571428573,
301
+ "eval_loss": 1.4200152158737183,
302
+ "eval_runtime": 4.9616,
303
+ "eval_samples_per_second": 14.108,
304
+ "eval_steps_per_second": 7.054,
305
+ "step": 450
306
  },
307
  {
308
  "epoch": 3.02,
309
  "learning_rate": 1.2962962962962962e-05,
310
+ "loss": 1.486,
311
+ "step": 460
312
+ },
313
+ {
314
+ "epoch": 3.03,
315
+ "learning_rate": 1.2037037037037037e-05,
316
+ "loss": 1.4733,
317
+ "step": 470
318
  },
319
  {
320
  "epoch": 3.05,
321
  "learning_rate": 1.1111111111111112e-05,
322
+ "loss": 1.6414,
323
+ "step": 480
324
+ },
325
+ {
326
+ "epoch": 3.07,
327
+ "learning_rate": 1.0185185185185185e-05,
328
+ "loss": 1.691,
329
+ "step": 490
330
  },
331
  {
332
  "epoch": 3.08,
333
  "learning_rate": 9.259259259259259e-06,
334
+ "loss": 1.6061,
335
+ "step": 500
336
+ },
337
+ {
338
+ "epoch": 3.1,
339
+ "learning_rate": 8.333333333333334e-06,
340
+ "loss": 1.7164,
341
+ "step": 510
342
  },
343
  {
344
  "epoch": 3.12,
345
  "learning_rate": 7.4074074074074075e-06,
346
+ "loss": 1.5214,
347
+ "step": 520
348
+ },
349
+ {
350
+ "epoch": 3.13,
351
+ "learning_rate": 6.481481481481481e-06,
352
+ "loss": 1.6721,
353
+ "step": 530
354
  },
355
  {
356
  "epoch": 3.15,
357
  "learning_rate": 5.555555555555556e-06,
358
+ "loss": 1.4644,
359
+ "step": 540
360
+ },
361
+ {
362
+ "epoch": 3.17,
363
+ "learning_rate": 4.6296296296296296e-06,
364
+ "loss": 1.5493,
365
+ "step": 550
366
  },
367
  {
368
  "epoch": 3.18,
369
  "learning_rate": 3.7037037037037037e-06,
370
+ "loss": 1.4737,
371
+ "step": 560
372
+ },
373
+ {
374
+ "epoch": 3.2,
375
+ "learning_rate": 2.777777777777778e-06,
376
+ "loss": 1.3461,
377
+ "step": 570
378
  },
379
  {
380
  "epoch": 3.22,
381
  "learning_rate": 1.8518518518518519e-06,
382
+ "loss": 1.3977,
383
+ "step": 580
384
+ },
385
+ {
386
+ "epoch": 3.23,
387
+ "learning_rate": 9.259259259259259e-07,
388
+ "loss": 1.4364,
389
+ "step": 590
390
  },
391
  {
392
  "epoch": 3.25,
393
  "learning_rate": 0.0,
394
+ "loss": 1.5778,
395
+ "step": 600
396
  },
397
  {
398
  "epoch": 3.25,
399
+ "eval_accuracy": 0.45714285714285713,
400
+ "eval_loss": 1.2338804006576538,
401
+ "eval_runtime": 5.037,
402
+ "eval_samples_per_second": 13.897,
403
+ "eval_steps_per_second": 6.949,
404
+ "step": 600
405
  },
406
  {
407
  "epoch": 3.25,
408
+ "step": 600,
409
  "total_flos": 1.495384188125184e+18,
410
+ "train_loss": 2.002050215403239,
411
+ "train_runtime": 261.8703,
412
+ "train_samples_per_second": 4.582,
413
+ "train_steps_per_second": 2.291
414
  },
415
  {
416
  "epoch": 3.25,
417
+ "eval_accuracy": 0.6387096774193548,
418
+ "eval_loss": 1.1570727825164795,
419
+ "eval_runtime": 11.1054,
420
+ "eval_samples_per_second": 13.957,
421
+ "eval_steps_per_second": 7.024,
422
+ "step": 600
423
  },
424
  {
425
  "epoch": 3.25,
426
+ "eval_accuracy": 0.6387096774193548,
427
+ "eval_loss": 1.15707266330719,
428
+ "eval_runtime": 11.086,
429
+ "eval_samples_per_second": 13.982,
430
+ "eval_steps_per_second": 7.036,
431
+ "step": 600
432
  }
433
  ],
434
+ "max_steps": 600,
435
  "num_train_epochs": 9223372036854775807,
436
  "total_flos": 1.495384188125184e+18,
437
  "trial_name": null,