dima806 commited on
Commit
37794cc
1 Parent(s): 003b0f8

Upload folder using huggingface_hub

Browse files
checkpoint-15740/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "google/vit-base-patch16-224-in21k",
3
  "architectures": [
4
  "ViTForImageClassification"
5
  ],
@@ -38,5 +38,5 @@
38
  "problem_type": "single_label_classification",
39
  "qkv_bias": true,
40
  "torch_dtype": "float32",
41
- "transformers_version": "4.33.3"
42
  }
 
1
  {
2
+ "_name_or_path": "dima806/facial_emotions_image_detection",
3
  "architectures": [
4
  "ViTForImageClassification"
5
  ],
 
38
  "problem_type": "single_label_classification",
39
  "qkv_bias": true,
40
  "torch_dtype": "float32",
41
+ "transformers_version": "4.38.1"
42
  }
checkpoint-15740/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13e55d8b7149fc6fab265cf018036ec58a4c63c32b4bfc25f5f48b6425bd5c6c
3
+ size 343239356
checkpoint-15740/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac054760eb7be1735767627048f8575bd48082e39a0b4d2c28b4cda232cf91db
3
  size 686599173
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ed812d05d91fefab828d7a87a29455271710955ba68022c7b1c9148a838a1f9
3
  size 686599173
checkpoint-15740/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6da11932a00ba3c22d65cca3ce0eed71ee7fbb6229986476e7b37e57e16eb416
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:274e4012e8c090cea5b5dd5fc3caed1814ac9189d6b2fb69a1a8db44f9ac3827
3
  size 14575
checkpoint-15740/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5652bb62c654fcdb2e0edd17c5199c98b9cb10ced1a2e8ec21174e5de298f889
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5e9f4dfba22da0c3088c8ec57bcf6afb6af38ac229cb650d1ecc4b65c60bb31
3
  size 627
checkpoint-15740/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.5859972834587097,
3
  "best_model_checkpoint": "facial_emotions_image_detection/checkpoint-15740",
4
  "epoch": 10.0,
5
  "eval_steps": 500,
@@ -10,286 +10,319 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.32,
13
- "learning_rate": 9.7131931166348e-06,
14
- "loss": 1.5691,
 
15
  "step": 500
16
  },
17
  {
18
  "epoch": 0.64,
19
- "learning_rate": 9.394518801784576e-06,
20
- "loss": 1.1566,
 
21
  "step": 1000
22
  },
23
  {
24
  "epoch": 0.95,
25
- "learning_rate": 9.075844486934355e-06,
26
- "loss": 1.005,
 
27
  "step": 1500
28
  },
29
  {
30
  "epoch": 1.0,
31
- "eval_accuracy": 0.6603098927294399,
32
- "eval_loss": 1.0014679431915283,
33
- "eval_runtime": 113.4351,
34
- "eval_samples_per_second": 110.944,
35
- "eval_steps_per_second": 13.876,
36
  "step": 1574
37
  },
38
  {
39
  "epoch": 1.27,
40
- "learning_rate": 8.757170172084131e-06,
41
- "loss": 0.9148,
 
42
  "step": 2000
43
  },
44
  {
45
  "epoch": 1.59,
46
- "learning_rate": 8.438495857233908e-06,
47
- "loss": 0.8522,
 
48
  "step": 2500
49
  },
50
  {
51
  "epoch": 1.91,
52
- "learning_rate": 8.119821542383684e-06,
53
- "loss": 0.8063,
 
54
  "step": 3000
55
  },
56
  {
57
  "epoch": 2.0,
58
- "eval_accuracy": 0.7133889551052841,
59
- "eval_loss": 0.8330217003822327,
60
- "eval_runtime": 108.2894,
61
- "eval_samples_per_second": 116.216,
62
- "eval_steps_per_second": 14.535,
63
  "step": 3148
64
  },
65
  {
66
  "epoch": 2.22,
67
- "learning_rate": 7.80114722753346e-06,
68
- "loss": 0.7566,
 
69
  "step": 3500
70
  },
71
  {
72
  "epoch": 2.54,
73
- "learning_rate": 7.482472912683239e-06,
74
- "loss": 0.7097,
 
75
  "step": 4000
76
  },
77
  {
78
  "epoch": 2.86,
79
- "learning_rate": 7.163798597833015e-06,
80
- "loss": 0.7016,
 
81
  "step": 4500
82
  },
83
  {
84
  "epoch": 3.0,
85
- "eval_accuracy": 0.7423917361938817,
86
- "eval_loss": 0.7428602576255798,
87
- "eval_runtime": 107.858,
88
- "eval_samples_per_second": 116.681,
89
- "eval_steps_per_second": 14.593,
90
  "step": 4722
91
  },
92
  {
93
  "epoch": 3.18,
94
- "learning_rate": 6.8451242829827925e-06,
95
- "loss": 0.6608,
 
96
  "step": 5000
97
  },
98
  {
99
  "epoch": 3.49,
100
- "learning_rate": 6.526449968132569e-06,
101
- "loss": 0.6235,
 
102
  "step": 5500
103
  },
104
  {
105
  "epoch": 3.81,
106
- "learning_rate": 6.207775653282346e-06,
107
- "loss": 0.6138,
 
108
  "step": 6000
109
  },
110
  {
111
  "epoch": 4.0,
112
- "eval_accuracy": 0.7507350019864919,
113
- "eval_loss": 0.7060629725456238,
114
- "eval_runtime": 109.3785,
115
- "eval_samples_per_second": 115.059,
116
- "eval_steps_per_second": 14.39,
117
  "step": 6296
118
  },
119
  {
120
  "epoch": 4.13,
121
- "learning_rate": 5.889101338432123e-06,
122
- "loss": 0.5723,
 
123
  "step": 6500
124
  },
125
  {
126
  "epoch": 4.45,
127
- "learning_rate": 5.5704270235819e-06,
128
- "loss": 0.5429,
 
129
  "step": 7000
130
  },
131
  {
132
  "epoch": 4.76,
133
- "learning_rate": 5.251752708731677e-06,
134
- "loss": 0.5403,
 
135
  "step": 7500
136
  },
137
  {
138
  "epoch": 5.0,
139
- "eval_accuracy": 0.7726658720699245,
140
- "eval_loss": 0.6644242405891418,
141
- "eval_runtime": 108.6283,
142
- "eval_samples_per_second": 115.854,
143
- "eval_steps_per_second": 14.49,
144
  "step": 7870
145
  },
146
  {
147
  "epoch": 5.08,
148
- "learning_rate": 4.933078393881454e-06,
149
- "loss": 0.5112,
 
150
  "step": 8000
151
  },
152
  {
153
  "epoch": 5.4,
154
- "learning_rate": 4.61440407903123e-06,
155
- "loss": 0.4647,
 
156
  "step": 8500
157
  },
158
  {
159
  "epoch": 5.72,
160
- "learning_rate": 4.295729764181008e-06,
161
- "loss": 0.4584,
 
162
  "step": 9000
163
  },
164
  {
165
  "epoch": 6.0,
166
- "eval_accuracy": 0.7871275327771157,
167
- "eval_loss": 0.6358364820480347,
168
- "eval_runtime": 108.2696,
169
- "eval_samples_per_second": 116.238,
170
- "eval_steps_per_second": 14.538,
171
  "step": 9444
172
  },
173
  {
174
  "epoch": 6.04,
175
- "learning_rate": 3.977055449330784e-06,
176
- "loss": 0.4567,
 
177
  "step": 9500
178
  },
179
  {
180
  "epoch": 6.35,
181
- "learning_rate": 3.6583811344805616e-06,
182
- "loss": 0.4134,
 
183
  "step": 10000
184
  },
185
  {
186
  "epoch": 6.67,
187
- "learning_rate": 3.339706819630338e-06,
188
- "loss": 0.3957,
 
189
  "step": 10500
190
  },
191
  {
192
  "epoch": 6.99,
193
- "learning_rate": 3.021032504780115e-06,
194
- "loss": 0.3991,
 
195
  "step": 11000
196
  },
197
  {
198
  "epoch": 7.0,
199
- "eval_accuracy": 0.800238379022646,
200
- "eval_loss": 0.6062497496604919,
201
- "eval_runtime": 108.4345,
202
- "eval_samples_per_second": 116.061,
203
- "eval_steps_per_second": 14.516,
204
  "step": 11018
205
  },
206
  {
207
  "epoch": 7.31,
208
- "learning_rate": 2.702358189929892e-06,
209
- "loss": 0.3545,
 
210
  "step": 11500
211
  },
212
  {
213
  "epoch": 7.62,
214
- "learning_rate": 2.3836838750796687e-06,
215
- "loss": 0.3538,
 
216
  "step": 12000
217
  },
218
  {
219
  "epoch": 7.94,
220
- "learning_rate": 2.0650095602294456e-06,
221
- "loss": 0.3544,
 
222
  "step": 12500
223
  },
224
  {
225
  "epoch": 8.0,
226
- "eval_accuracy": 0.8045292014302742,
227
- "eval_loss": 0.6008493900299072,
228
- "eval_runtime": 110.3834,
229
- "eval_samples_per_second": 114.012,
230
- "eval_steps_per_second": 14.259,
231
  "step": 12592
232
  },
233
  {
234
  "epoch": 8.26,
235
- "learning_rate": 1.7463352453792225e-06,
236
- "loss": 0.3183,
 
237
  "step": 13000
238
  },
239
  {
240
  "epoch": 8.58,
241
- "learning_rate": 1.4276609305289997e-06,
242
- "loss": 0.3218,
 
243
  "step": 13500
244
  },
245
  {
246
  "epoch": 8.89,
247
- "learning_rate": 1.1089866156787763e-06,
248
- "loss": 0.3184,
 
249
  "step": 14000
250
  },
251
  {
252
  "epoch": 9.0,
253
- "eval_accuracy": 0.8116805721096544,
254
- "eval_loss": 0.590340256690979,
255
- "eval_runtime": 108.9419,
256
- "eval_samples_per_second": 115.52,
257
- "eval_steps_per_second": 14.448,
258
  "step": 14166
259
  },
260
  {
261
  "epoch": 9.21,
262
- "learning_rate": 7.903123008285534e-07,
263
- "loss": 0.3054,
 
264
  "step": 14500
265
  },
266
  {
267
  "epoch": 9.53,
268
- "learning_rate": 4.7163798597833015e-07,
269
- "loss": 0.2813,
 
270
  "step": 15000
271
  },
272
  {
273
  "epoch": 9.85,
274
- "learning_rate": 1.5296367112810708e-07,
275
- "loss": 0.2984,
 
276
  "step": 15500
277
  },
278
  {
279
  "epoch": 10.0,
280
- "eval_accuracy": 0.8162097735399285,
281
- "eval_loss": 0.5859972834587097,
282
- "eval_runtime": 111.4622,
283
- "eval_samples_per_second": 112.908,
284
- "eval_steps_per_second": 14.121,
285
  "step": 15740
286
  }
287
  ],
288
  "logging_steps": 500,
289
  "max_steps": 15740,
 
290
  "num_train_epochs": 10,
291
  "save_steps": 500,
292
  "total_flos": 3.900966581033497e+19,
 
293
  "trial_name": null,
294
  "trial_params": null
295
  }
 
1
  {
2
+ "best_metric": 0.3781418800354004,
3
  "best_model_checkpoint": "facial_emotions_image_detection/checkpoint-15740",
4
  "epoch": 10.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.32,
13
+ "grad_norm": 11.892455101013184,
14
+ "learning_rate": 3.885277246653919e-06,
15
+ "loss": 0.5534,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.64,
20
+ "grad_norm": 12.262578964233398,
21
+ "learning_rate": 3.7578075207138302e-06,
22
+ "loss": 0.5424,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.95,
27
+ "grad_norm": 11.160516738891602,
28
+ "learning_rate": 3.630337794773741e-06,
29
+ "loss": 0.5402,
30
  "step": 1500
31
  },
32
  {
33
  "epoch": 1.0,
34
+ "eval_accuracy": 0.8678585617798967,
35
+ "eval_loss": 0.4141866862773895,
36
+ "eval_runtime": 113.7657,
37
+ "eval_samples_per_second": 110.622,
38
+ "eval_steps_per_second": 13.835,
39
  "step": 1574
40
  },
41
  {
42
  "epoch": 1.27,
43
+ "grad_norm": 12.081930160522461,
44
+ "learning_rate": 3.5028680688336517e-06,
45
+ "loss": 0.509,
46
  "step": 2000
47
  },
48
  {
49
  "epoch": 1.59,
50
+ "grad_norm": 8.023213386535645,
51
+ "learning_rate": 3.375398342893563e-06,
52
+ "loss": 0.4984,
53
  "step": 2500
54
  },
55
  {
56
  "epoch": 1.91,
57
+ "grad_norm": 11.671684265136719,
58
+ "learning_rate": 3.2479286169534735e-06,
59
+ "loss": 0.4817,
60
  "step": 3000
61
  },
62
  {
63
  "epoch": 2.0,
64
+ "eval_accuracy": 0.8661899086213747,
65
+ "eval_loss": 0.41461509466171265,
66
+ "eval_runtime": 111.4496,
67
+ "eval_samples_per_second": 112.921,
68
+ "eval_steps_per_second": 14.123,
69
  "step": 3148
70
  },
71
  {
72
  "epoch": 2.22,
73
+ "grad_norm": 11.465852737426758,
74
+ "learning_rate": 3.120458891013384e-06,
75
+ "loss": 0.4686,
76
  "step": 3500
77
  },
78
  {
79
  "epoch": 2.54,
80
+ "grad_norm": 13.02270221710205,
81
+ "learning_rate": 2.992989165073295e-06,
82
+ "loss": 0.451,
83
  "step": 4000
84
  },
85
  {
86
  "epoch": 2.86,
87
+ "grad_norm": 7.672973155975342,
88
+ "learning_rate": 2.8655194391332055e-06,
89
+ "loss": 0.4459,
90
  "step": 4500
91
  },
92
  {
93
  "epoch": 3.0,
94
+ "eval_accuracy": 0.8698450536352801,
95
+ "eval_loss": 0.40479913353919983,
96
+ "eval_runtime": 111.5158,
97
+ "eval_samples_per_second": 112.854,
98
+ "eval_steps_per_second": 14.115,
99
  "step": 4722
100
  },
101
  {
102
  "epoch": 3.18,
103
+ "grad_norm": 12.763694763183594,
104
+ "learning_rate": 2.7380497131931164e-06,
105
+ "loss": 0.4335,
106
  "step": 5000
107
  },
108
  {
109
  "epoch": 3.49,
110
+ "grad_norm": 10.964895248413086,
111
+ "learning_rate": 2.6105799872530273e-06,
112
+ "loss": 0.4325,
113
  "step": 5500
114
  },
115
  {
116
  "epoch": 3.81,
117
+ "grad_norm": 12.650814056396484,
118
+ "learning_rate": 2.4831102613129383e-06,
119
+ "loss": 0.4031,
120
  "step": 6000
121
  },
122
  {
123
  "epoch": 4.0,
124
+ "eval_accuracy": 0.8707985697258641,
125
+ "eval_loss": 0.3987765610218048,
126
+ "eval_runtime": 110.9162,
127
+ "eval_samples_per_second": 113.464,
128
+ "eval_steps_per_second": 14.191,
129
  "step": 6296
130
  },
131
  {
132
  "epoch": 4.13,
133
+ "grad_norm": 7.15053653717041,
134
+ "learning_rate": 2.3556405353728488e-06,
135
+ "loss": 0.4112,
136
  "step": 6500
137
  },
138
  {
139
  "epoch": 4.45,
140
+ "grad_norm": 17.31258773803711,
141
+ "learning_rate": 2.2281708094327597e-06,
142
+ "loss": 0.3886,
143
  "step": 7000
144
  },
145
  {
146
  "epoch": 4.76,
147
+ "grad_norm": 7.938920497894287,
148
+ "learning_rate": 2.1007010834926702e-06,
149
+ "loss": 0.3804,
150
  "step": 7500
151
  },
152
  {
153
  "epoch": 5.0,
154
+ "eval_accuracy": 0.8742947953913389,
155
+ "eval_loss": 0.39545899629592896,
156
+ "eval_runtime": 110.9484,
157
+ "eval_samples_per_second": 113.431,
158
+ "eval_steps_per_second": 14.187,
159
  "step": 7870
160
  },
161
  {
162
  "epoch": 5.08,
163
+ "grad_norm": 11.187897682189941,
164
+ "learning_rate": 1.973231357552581e-06,
165
+ "loss": 0.3856,
166
  "step": 8000
167
  },
168
  {
169
  "epoch": 5.4,
170
+ "grad_norm": 12.73786735534668,
171
+ "learning_rate": 1.845761631612492e-06,
172
+ "loss": 0.3732,
173
  "step": 8500
174
  },
175
  {
176
  "epoch": 5.72,
177
+ "grad_norm": 6.2277374267578125,
178
+ "learning_rate": 1.7182919056724028e-06,
179
+ "loss": 0.3733,
180
  "step": 9000
181
  },
182
  {
183
  "epoch": 6.0,
184
+ "eval_accuracy": 0.8767580452920143,
185
+ "eval_loss": 0.3847721815109253,
186
+ "eval_runtime": 113.9748,
187
+ "eval_samples_per_second": 110.419,
188
+ "eval_steps_per_second": 13.81,
189
  "step": 9444
190
  },
191
  {
192
  "epoch": 6.04,
193
+ "grad_norm": 9.49077320098877,
194
+ "learning_rate": 1.5908221797323135e-06,
195
+ "loss": 0.3552,
196
  "step": 9500
197
  },
198
  {
199
  "epoch": 6.35,
200
+ "grad_norm": 11.691384315490723,
201
+ "learning_rate": 1.4633524537922245e-06,
202
+ "loss": 0.355,
203
  "step": 10000
204
  },
205
  {
206
  "epoch": 6.67,
207
+ "grad_norm": 7.24980354309082,
208
+ "learning_rate": 1.335882727852135e-06,
209
+ "loss": 0.347,
210
  "step": 10500
211
  },
212
  {
213
  "epoch": 6.99,
214
+ "grad_norm": 15.983756065368652,
215
+ "learning_rate": 1.2084130019120457e-06,
216
+ "loss": 0.3462,
217
  "step": 11000
218
  },
219
  {
220
  "epoch": 7.0,
221
+ "eval_accuracy": 0.877870480731029,
222
+ "eval_loss": 0.38435637950897217,
223
+ "eval_runtime": 114.6747,
224
+ "eval_samples_per_second": 109.745,
225
+ "eval_steps_per_second": 13.726,
226
  "step": 11018
227
  },
228
  {
229
  "epoch": 7.31,
230
+ "grad_norm": 6.091674327850342,
231
+ "learning_rate": 1.0809432759719566e-06,
232
+ "loss": 0.3481,
233
  "step": 11500
234
  },
235
  {
236
  "epoch": 7.62,
237
+ "grad_norm": 14.106987953186035,
238
+ "learning_rate": 9.534735500318673e-07,
239
+ "loss": 0.3373,
240
  "step": 12000
241
  },
242
  {
243
  "epoch": 7.94,
244
+ "grad_norm": 8.448904991149902,
245
+ "learning_rate": 8.260038240917782e-07,
246
+ "loss": 0.324,
247
  "step": 12500
248
  },
249
  {
250
  "epoch": 8.0,
251
+ "eval_accuracy": 0.876201827572507,
252
+ "eval_loss": 0.3851085305213928,
253
+ "eval_runtime": 113.668,
254
+ "eval_samples_per_second": 110.717,
255
+ "eval_steps_per_second": 13.847,
256
  "step": 12592
257
  },
258
  {
259
  "epoch": 8.26,
260
+ "grad_norm": 6.959225177764893,
261
+ "learning_rate": 6.98534098151689e-07,
262
+ "loss": 0.3254,
263
  "step": 13000
264
  },
265
  {
266
  "epoch": 8.58,
267
+ "grad_norm": 10.736397743225098,
268
+ "learning_rate": 5.710643722115998e-07,
269
+ "loss": 0.3179,
270
  "step": 13500
271
  },
272
  {
273
  "epoch": 8.89,
274
+ "grad_norm": 10.716438293457031,
275
+ "learning_rate": 4.435946462715105e-07,
276
+ "loss": 0.3217,
277
  "step": 14000
278
  },
279
  {
280
  "epoch": 9.0,
281
+ "eval_accuracy": 0.8787445371473976,
282
+ "eval_loss": 0.3801954388618469,
283
+ "eval_runtime": 113.1501,
284
+ "eval_samples_per_second": 111.224,
285
+ "eval_steps_per_second": 13.911,
286
  "step": 14166
287
  },
288
  {
289
  "epoch": 9.21,
290
+ "grad_norm": 12.26369571685791,
291
+ "learning_rate": 3.161249203314213e-07,
292
+ "loss": 0.309,
293
  "step": 14500
294
  },
295
  {
296
  "epoch": 9.53,
297
+ "grad_norm": 6.435930252075195,
298
+ "learning_rate": 1.8865519439133203e-07,
299
+ "loss": 0.3158,
300
  "step": 15000
301
  },
302
  {
303
  "epoch": 9.85,
304
+ "grad_norm": 15.131240844726562,
305
+ "learning_rate": 6.118546845124282e-08,
306
+ "loss": 0.3105,
307
  "step": 15500
308
  },
309
  {
310
  "epoch": 10.0,
311
+ "eval_accuracy": 0.8789829161700436,
312
+ "eval_loss": 0.3781418800354004,
313
+ "eval_runtime": 112.3381,
314
+ "eval_samples_per_second": 112.028,
315
+ "eval_steps_per_second": 14.011,
316
  "step": 15740
317
  }
318
  ],
319
  "logging_steps": 500,
320
  "max_steps": 15740,
321
+ "num_input_tokens_seen": 0,
322
  "num_train_epochs": 10,
323
  "save_steps": 500,
324
  "total_flos": 3.900966581033497e+19,
325
+ "train_batch_size": 32,
326
  "trial_name": null,
327
  "trial_params": null
328
  }
checkpoint-15740/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88f8d355cab9ef739f5fc476aba7f6e893c6c8f92be3fff90b9d4091956a9dd9
3
- size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:412104b61f174a6b32d7fb7f0dee0c8ea25fe50495d4c2050346fa634d05d144
3
+ size 4411
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6c60a65625a5858cb798dc3334d437083ee9b580c83d48a51f373240a7a1787
3
  size 343239356
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13e55d8b7149fc6fab265cf018036ec58a4c63c32b4bfc25f5f48b6425bd5c6c
3
  size 343239356
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4a27ac1cf21ac69961911d3f69cc32618d3c82fd492d9d41337dd8d2d59a7a0
3
  size 4411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:412104b61f174a6b32d7fb7f0dee0c8ea25fe50495d4c2050346fa634d05d144
3
  size 4411