OmAlve commited on
Commit
934882f
1 Parent(s): 26a9a86

better evaluation and only trained the classifier layer

Browse files
README.md CHANGED
@@ -18,8 +18,8 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [google/vit-base-patch16-224](https://huggingface.co/google/vit-base-patch16-224) on the pcuenq/oxford-pets dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.0058
22
- - Accuracy: 0.9988
23
 
24
  ## Model description
25
 
@@ -39,27 +39,28 @@ More information needed
39
 
40
  The following hyperparameters were used during training:
41
  - learning_rate: 0.0003
42
- - train_batch_size: 64
43
- - eval_batch_size: 8
44
  - seed: 42
45
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
46
  - lr_scheduler_type: linear
47
- - num_epochs: 4
48
  - mixed_precision_training: Native AMP
49
 
50
  ### Training results
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
53
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
54
- | 0.3713 | 0.86 | 100 | 0.2084 | 0.9307 |
55
- | 0.1173 | 1.72 | 200 | 0.0774 | 0.9763 |
56
- | 0.0612 | 2.59 | 300 | 0.0212 | 0.9947 |
57
- | 0.007 | 3.45 | 400 | 0.0058 | 0.9988 |
 
58
 
59
 
60
  ### Framework versions
61
 
62
  - Transformers 4.39.2
63
- - Pytorch 2.2.1+cu121
64
  - Datasets 2.16.0
65
  - Tokenizers 0.15.2
 
18
 
19
  This model is a fine-tuned version of [google/vit-base-patch16-224](https://huggingface.co/google/vit-base-patch16-224) on the pcuenq/oxford-pets dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.3168
22
+ - Accuracy: 0.9432
23
 
24
  ## Model description
25
 
 
39
 
40
  The following hyperparameters were used during training:
41
  - learning_rate: 0.0003
42
+ - train_batch_size: 128
43
+ - eval_batch_size: 16
44
  - seed: 42
45
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
46
  - lr_scheduler_type: linear
47
+ - num_epochs: 5
48
  - mixed_precision_training: Native AMP
49
 
50
  ### Training results
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
53
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
54
+ | 1.5136 | 1.0 | 47 | 1.1031 | 0.8430 |
55
+ | 0.5547 | 2.0 | 94 | 0.5232 | 0.9269 |
56
+ | 0.4111 | 3.0 | 141 | 0.3988 | 0.9310 |
57
+ | 0.3438 | 4.0 | 188 | 0.3553 | 0.9337 |
58
+ | 0.298 | 5.0 | 235 | 0.3448 | 0.9296 |
59
 
60
 
61
  ### Framework versions
62
 
63
  - Transformers 4.39.2
64
+ - Pytorch 2.1.2
65
  - Datasets 2.16.0
66
  - Tokenizers 0.15.2
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.0,
3
  "total_flos": 2.2913817801515827e+18,
4
- "train_loss": 0.19263494449491003,
5
- "train_runtime": 813.2364,
6
- "train_samples_per_second": 36.349,
7
- "train_steps_per_second": 0.571
8
  }
 
1
  {
2
+ "epoch": 5.0,
3
  "total_flos": 2.2913817801515827e+18,
4
+ "train_loss": 0.8009341437765892,
5
+ "train_runtime": 407.3253,
6
+ "train_samples_per_second": 72.571,
7
+ "train_steps_per_second": 0.577
8
  }
config.json CHANGED
@@ -9,85 +9,85 @@
9
  "hidden_dropout_prob": 0.0,
10
  "hidden_size": 768,
11
  "id2label": {
12
- "0": "saint bernard dog",
13
- "1": "Ragdoll cat",
14
- "10": "keeshond dog",
15
- "11": "english cocker spaniel dog",
16
- "12": "beagle dog",
17
- "13": "Russian Blue cat",
18
- "14": "scottish terrier dog",
19
- "15": "newfoundland dog",
20
- "16": "Bombay cat",
21
- "17": "Bengal cat",
22
- "18": "japanese chin dog",
23
- "19": "Sphynx cat",
24
- "2": "havanese dog",
25
- "20": "Persian cat",
26
- "21": "shiba inu dog",
27
- "22": "english setter dog",
28
- "23": "great pyrenees dog",
29
- "24": "chihuahua dog",
30
- "25": "miniature pinscher dog",
31
- "26": "pomeranian dog",
32
- "27": "Abyssinian cat",
33
- "28": "basset hound dog",
34
- "29": "Birman cat",
35
- "3": "Egyptian Mau cat",
36
- "30": "german shorthaired dog",
37
- "31": "american bulldog dog",
38
- "32": "american pit bull terrier dog",
39
- "33": "staffordshire bull terrier dog",
40
- "34": "Maine Coon cat",
41
- "35": "samoyed dog",
42
  "36": "boxer dog",
43
- "4": "Siamese cat",
44
- "5": "yorkshire terrier dog",
45
- "6": "leonberger dog",
46
- "7": "pug dog",
47
  "8": "British Shorthair cat",
48
- "9": "wheaten terrier dog"
49
  },
50
  "image_size": 224,
51
  "initializer_range": 0.02,
52
  "intermediate_size": 3072,
53
  "label2id": {
54
- "Abyssinian cat": "27",
55
- "Bengal cat": "17",
56
- "Birman cat": "29",
57
- "Bombay cat": "16",
58
  "British Shorthair cat": "8",
59
- "Egyptian Mau cat": "3",
60
- "Maine Coon cat": "34",
61
- "Persian cat": "20",
62
- "Ragdoll cat": "1",
63
- "Russian Blue cat": "13",
64
- "Siamese cat": "4",
65
- "Sphynx cat": "19",
66
- "american bulldog dog": "31",
67
- "american pit bull terrier dog": "32",
68
- "basset hound dog": "28",
69
- "beagle dog": "12",
70
  "boxer dog": "36",
71
- "chihuahua dog": "24",
72
- "english cocker spaniel dog": "11",
73
- "english setter dog": "22",
74
- "german shorthaired dog": "30",
75
- "great pyrenees dog": "23",
76
- "havanese dog": "2",
77
- "japanese chin dog": "18",
78
- "keeshond dog": "10",
79
- "leonberger dog": "6",
80
- "miniature pinscher dog": "25",
81
- "newfoundland dog": "15",
82
- "pomeranian dog": "26",
83
- "pug dog": "7",
84
- "saint bernard dog": "0",
85
- "samoyed dog": "35",
86
- "scottish terrier dog": "14",
87
- "shiba inu dog": "21",
88
- "staffordshire bull terrier dog": "33",
89
- "wheaten terrier dog": "9",
90
- "yorkshire terrier dog": "5"
91
  },
92
  "layer_norm_eps": 1e-12,
93
  "model_type": "vit",
 
9
  "hidden_dropout_prob": 0.0,
10
  "hidden_size": 768,
11
  "id2label": {
12
+ "0": "american pit bull terrier dog",
13
+ "1": "Maine Coon cat",
14
+ "10": "german shorthaired dog",
15
+ "11": "Abyssinian cat",
16
+ "12": "havanese dog",
17
+ "13": "pomeranian dog",
18
+ "14": "newfoundland dog",
19
+ "15": "Egyptian Mau cat",
20
+ "16": "Russian Blue cat",
21
+ "17": "Bombay cat",
22
+ "18": "leonberger dog",
23
+ "19": "Persian cat",
24
+ "2": "english setter dog",
25
+ "20": "samoyed dog",
26
+ "21": "keeshond dog",
27
+ "22": "scottish terrier dog",
28
+ "23": "shiba inu dog",
29
+ "24": "staffordshire bull terrier dog",
30
+ "25": "basset hound dog",
31
+ "26": "Siamese cat",
32
+ "27": "pug dog",
33
+ "28": "miniature pinscher dog",
34
+ "29": "japanese chin dog",
35
+ "3": "Birman cat",
36
+ "30": "chihuahua dog",
37
+ "31": "Bengal cat",
38
+ "32": "beagle dog",
39
+ "33": "Sphynx cat",
40
+ "34": "saint bernard dog",
41
+ "35": "Ragdoll cat",
42
  "36": "boxer dog",
43
+ "4": "wheaten terrier dog",
44
+ "5": "great pyrenees dog",
45
+ "6": "american bulldog dog",
46
+ "7": "english cocker spaniel dog",
47
  "8": "British Shorthair cat",
48
+ "9": "yorkshire terrier dog"
49
  },
50
  "image_size": 224,
51
  "initializer_range": 0.02,
52
  "intermediate_size": 3072,
53
  "label2id": {
54
+ "Abyssinian cat": "11",
55
+ "Bengal cat": "31",
56
+ "Birman cat": "3",
57
+ "Bombay cat": "17",
58
  "British Shorthair cat": "8",
59
+ "Egyptian Mau cat": "15",
60
+ "Maine Coon cat": "1",
61
+ "Persian cat": "19",
62
+ "Ragdoll cat": "35",
63
+ "Russian Blue cat": "16",
64
+ "Siamese cat": "26",
65
+ "Sphynx cat": "33",
66
+ "american bulldog dog": "6",
67
+ "american pit bull terrier dog": "0",
68
+ "basset hound dog": "25",
69
+ "beagle dog": "32",
70
  "boxer dog": "36",
71
+ "chihuahua dog": "30",
72
+ "english cocker spaniel dog": "7",
73
+ "english setter dog": "2",
74
+ "german shorthaired dog": "10",
75
+ "great pyrenees dog": "5",
76
+ "havanese dog": "12",
77
+ "japanese chin dog": "29",
78
+ "keeshond dog": "21",
79
+ "leonberger dog": "18",
80
+ "miniature pinscher dog": "28",
81
+ "newfoundland dog": "14",
82
+ "pomeranian dog": "13",
83
+ "pug dog": "27",
84
+ "saint bernard dog": "34",
85
+ "samoyed dog": "20",
86
+ "scottish terrier dog": "22",
87
+ "shiba inu dog": "23",
88
+ "staffordshire bull terrier dog": "24",
89
+ "wheaten terrier dog": "4",
90
+ "yorkshire terrier dog": "9"
91
  },
92
  "layer_norm_eps": 1e-12,
93
  "model_type": "vit",
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f4f4ab3ed13788b1a113a26071c629101eaba9b3e315e4735a8da6b713924ef
3
  size 343331644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c481d24458fdc1fd579d745131513f9125c8e3b8fb0c33d566356465f8370611
3
  size 343331644
runs/Mar31_10-44-13_0de03c5233fc/events.out.tfevents.1711881913.0de03c5233fc.34.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04377611248f9ad9357d199307a6d9002f0980bb18705e47089639b077f7b8a5
3
+ size 13763
runs/Mar31_10-44-13_0de03c5233fc/events.out.tfevents.1711882353.0de03c5233fc.34.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a81816eb4f226e038038b0ed56ffb97533dac0ae477cba379146a719849d54c7
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.0,
3
  "total_flos": 2.2913817801515827e+18,
4
- "train_loss": 0.19263494449491003,
5
- "train_runtime": 813.2364,
6
- "train_samples_per_second": 36.349,
7
- "train_steps_per_second": 0.571
8
  }
 
1
  {
2
+ "epoch": 5.0,
3
  "total_flos": 2.2913817801515827e+18,
4
+ "train_loss": 0.8009341437765892,
5
+ "train_runtime": 407.3253,
6
+ "train_samples_per_second": 72.571,
7
+ "train_steps_per_second": 0.577
8
  }
trainer_state.json CHANGED
@@ -1,388 +1,236 @@
1
  {
2
- "best_metric": 0.005833666305989027,
3
- "best_model_checkpoint": "./vit-base-pets/checkpoint-400",
4
- "epoch": 4.0,
5
- "eval_steps": 100,
6
- "global_step": 464,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.09,
13
- "grad_norm": 3.28558611869812,
14
- "learning_rate": 0.00029353448275862065,
15
- "loss": 2.365,
16
  "step": 10
17
  },
18
  {
19
- "epoch": 0.17,
20
- "grad_norm": 2.781508684158325,
21
- "learning_rate": 0.0002870689655172413,
22
- "loss": 0.7805,
23
  "step": 20
24
  },
25
  {
26
- "epoch": 0.26,
27
- "grad_norm": 2.789832353591919,
28
- "learning_rate": 0.00028060344827586205,
29
- "loss": 0.4669,
30
  "step": 30
31
  },
32
  {
33
- "epoch": 0.34,
34
- "grad_norm": 3.0019114017486572,
35
- "learning_rate": 0.0002741379310344827,
36
- "loss": 0.4777,
37
  "step": 40
38
  },
39
  {
40
- "epoch": 0.43,
41
- "grad_norm": 2.8678109645843506,
42
- "learning_rate": 0.00026767241379310345,
43
- "loss": 0.3825,
 
 
 
 
 
 
 
 
 
44
  "step": 50
45
  },
46
  {
47
- "epoch": 0.52,
48
- "grad_norm": 1.979690432548523,
49
- "learning_rate": 0.00026120689655172413,
50
- "loss": 0.3924,
51
  "step": 60
52
  },
53
  {
54
- "epoch": 0.6,
55
- "grad_norm": 1.9895226955413818,
56
- "learning_rate": 0.0002547413793103448,
57
- "loss": 0.307,
58
  "step": 70
59
  },
60
  {
61
- "epoch": 0.69,
62
- "grad_norm": 2.970583438873291,
63
- "learning_rate": 0.0002482758620689655,
64
- "loss": 0.4071,
65
  "step": 80
66
  },
67
  {
68
- "epoch": 0.78,
69
- "grad_norm": 3.0911920070648193,
70
- "learning_rate": 0.00024181034482758618,
71
- "loss": 0.3594,
72
  "step": 90
73
  },
74
  {
75
- "epoch": 0.86,
76
- "grad_norm": 2.5045769214630127,
77
- "learning_rate": 0.00023534482758620685,
78
- "loss": 0.3713,
79
- "step": 100
 
 
80
  },
81
  {
82
- "epoch": 0.86,
83
- "eval_accuracy": 0.9307171853856563,
84
- "eval_loss": 0.20842242240905762,
85
- "eval_runtime": 68.2075,
86
- "eval_samples_per_second": 108.346,
87
- "eval_steps_per_second": 13.547,
88
  "step": 100
89
  },
90
  {
91
- "epoch": 0.95,
92
- "grad_norm": 1.4575306177139282,
93
- "learning_rate": 0.00022887931034482758,
94
- "loss": 0.3301,
95
  "step": 110
96
  },
97
  {
98
- "epoch": 1.03,
99
- "grad_norm": 1.9479578733444214,
100
- "learning_rate": 0.00022241379310344826,
101
- "loss": 0.2504,
102
  "step": 120
103
  },
104
  {
105
- "epoch": 1.12,
106
- "grad_norm": 1.73411226272583,
107
- "learning_rate": 0.00021594827586206896,
108
- "loss": 0.1697,
109
  "step": 130
110
  },
111
  {
112
- "epoch": 1.21,
113
- "grad_norm": 1.672253966331482,
114
- "learning_rate": 0.00020948275862068963,
115
- "loss": 0.116,
116
  "step": 140
117
  },
118
  {
119
- "epoch": 1.29,
120
- "grad_norm": 2.3492820262908936,
121
- "learning_rate": 0.00020301724137931034,
122
- "loss": 0.158,
 
 
 
 
 
 
 
 
 
123
  "step": 150
124
  },
125
  {
126
- "epoch": 1.38,
127
- "grad_norm": 1.4678938388824463,
128
- "learning_rate": 0.000196551724137931,
129
- "loss": 0.1487,
130
  "step": 160
131
  },
132
  {
133
- "epoch": 1.47,
134
- "grad_norm": 2.1428756713867188,
135
- "learning_rate": 0.00019008620689655169,
136
- "loss": 0.1121,
137
  "step": 170
138
  },
139
  {
140
- "epoch": 1.55,
141
- "grad_norm": 1.255344271659851,
142
- "learning_rate": 0.0001836206896551724,
143
- "loss": 0.1548,
144
  "step": 180
145
  },
146
  {
147
- "epoch": 1.64,
148
- "grad_norm": 1.692832350730896,
149
- "learning_rate": 0.0001771551724137931,
150
- "loss": 0.0916,
 
 
 
 
 
 
 
 
 
151
  "step": 190
152
  },
153
  {
154
- "epoch": 1.72,
155
- "grad_norm": 2.322737693786621,
156
- "learning_rate": 0.0001706896551724138,
157
- "loss": 0.1173,
158
- "step": 200
159
- },
160
- {
161
- "epoch": 1.72,
162
- "eval_accuracy": 0.976319350473613,
163
- "eval_loss": 0.07739943265914917,
164
- "eval_runtime": 67.8967,
165
- "eval_samples_per_second": 108.842,
166
- "eval_steps_per_second": 13.609,
167
  "step": 200
168
  },
169
  {
170
- "epoch": 1.81,
171
- "grad_norm": 1.99238920211792,
172
- "learning_rate": 0.00016422413793103446,
173
- "loss": 0.1311,
174
  "step": 210
175
  },
176
  {
177
- "epoch": 1.9,
178
- "grad_norm": 2.3652477264404297,
179
- "learning_rate": 0.00015775862068965517,
180
- "loss": 0.1114,
181
  "step": 220
182
  },
183
  {
184
- "epoch": 1.98,
185
- "grad_norm": 1.3925710916519165,
186
- "learning_rate": 0.00015129310344827584,
187
- "loss": 0.1235,
188
  "step": 230
189
  },
190
  {
191
- "epoch": 2.07,
192
- "grad_norm": 2.0290815830230713,
193
- "learning_rate": 0.00014482758620689654,
194
- "loss": 0.0772,
195
- "step": 240
196
- },
197
- {
198
- "epoch": 2.16,
199
- "grad_norm": 2.4121060371398926,
200
- "learning_rate": 0.00013836206896551724,
201
- "loss": 0.0715,
202
- "step": 250
203
- },
204
- {
205
- "epoch": 2.24,
206
- "grad_norm": 0.9658297300338745,
207
- "learning_rate": 0.00013189655172413792,
208
- "loss": 0.0444,
209
- "step": 260
210
- },
211
- {
212
- "epoch": 2.33,
213
- "grad_norm": 0.24860858917236328,
214
- "learning_rate": 0.0001254310344827586,
215
- "loss": 0.0668,
216
- "step": 270
217
- },
218
- {
219
- "epoch": 2.41,
220
- "grad_norm": 1.50627601146698,
221
- "learning_rate": 0.00011896551724137931,
222
- "loss": 0.034,
223
- "step": 280
224
- },
225
- {
226
- "epoch": 2.5,
227
- "grad_norm": 1.2053415775299072,
228
- "learning_rate": 0.0001125,
229
- "loss": 0.0489,
230
- "step": 290
231
- },
232
- {
233
- "epoch": 2.59,
234
- "grad_norm": 1.2974027395248413,
235
- "learning_rate": 0.00010603448275862067,
236
- "loss": 0.0612,
237
- "step": 300
238
- },
239
- {
240
- "epoch": 2.59,
241
- "eval_accuracy": 0.9947225981055481,
242
- "eval_loss": 0.021214015781879425,
243
- "eval_runtime": 67.3034,
244
- "eval_samples_per_second": 109.801,
245
- "eval_steps_per_second": 13.729,
246
- "step": 300
247
- },
248
- {
249
- "epoch": 2.67,
250
- "grad_norm": 0.22053079307079315,
251
- "learning_rate": 9.956896551724137e-05,
252
- "loss": 0.0308,
253
- "step": 310
254
- },
255
- {
256
- "epoch": 2.76,
257
- "grad_norm": 0.8180058002471924,
258
- "learning_rate": 9.310344827586206e-05,
259
- "loss": 0.03,
260
- "step": 320
261
- },
262
- {
263
- "epoch": 2.84,
264
- "grad_norm": 1.5855587720870972,
265
- "learning_rate": 8.663793103448275e-05,
266
- "loss": 0.0235,
267
- "step": 330
268
- },
269
- {
270
- "epoch": 2.93,
271
- "grad_norm": 1.7537671327590942,
272
- "learning_rate": 8.017241379310344e-05,
273
- "loss": 0.0225,
274
- "step": 340
275
- },
276
- {
277
- "epoch": 3.02,
278
- "grad_norm": 0.10956920683383942,
279
- "learning_rate": 7.370689655172413e-05,
280
- "loss": 0.0156,
281
- "step": 350
282
- },
283
- {
284
- "epoch": 3.1,
285
- "grad_norm": 1.1361974477767944,
286
- "learning_rate": 6.724137931034483e-05,
287
- "loss": 0.0094,
288
- "step": 360
289
- },
290
- {
291
- "epoch": 3.19,
292
- "grad_norm": 0.9139267802238464,
293
- "learning_rate": 6.077586206896551e-05,
294
- "loss": 0.0128,
295
- "step": 370
296
- },
297
- {
298
- "epoch": 3.28,
299
- "grad_norm": 0.12278908491134644,
300
- "learning_rate": 5.4310344827586204e-05,
301
- "loss": 0.0087,
302
- "step": 380
303
- },
304
- {
305
- "epoch": 3.36,
306
- "grad_norm": 0.0675448328256607,
307
- "learning_rate": 4.78448275862069e-05,
308
- "loss": 0.0048,
309
- "step": 390
310
- },
311
- {
312
- "epoch": 3.45,
313
- "grad_norm": 0.05717047303915024,
314
- "learning_rate": 4.137931034482758e-05,
315
- "loss": 0.007,
316
- "step": 400
317
  },
318
  {
319
- "epoch": 3.45,
320
- "eval_accuracy": 0.9987821380243572,
321
- "eval_loss": 0.005833666305989027,
322
- "eval_runtime": 68.7353,
323
- "eval_samples_per_second": 107.514,
324
- "eval_steps_per_second": 13.443,
325
- "step": 400
326
- },
327
- {
328
- "epoch": 3.53,
329
- "grad_norm": 0.04572397843003273,
330
- "learning_rate": 3.4913793103448275e-05,
331
- "loss": 0.0051,
332
- "step": 410
333
- },
334
- {
335
- "epoch": 3.62,
336
- "grad_norm": 0.16931650042533875,
337
- "learning_rate": 2.8448275862068963e-05,
338
- "loss": 0.006,
339
- "step": 420
340
- },
341
- {
342
- "epoch": 3.71,
343
- "grad_norm": 0.041402418166399,
344
- "learning_rate": 2.198275862068965e-05,
345
- "loss": 0.0054,
346
- "step": 430
347
- },
348
- {
349
- "epoch": 3.79,
350
- "grad_norm": 0.02362515777349472,
351
- "learning_rate": 1.5517241379310342e-05,
352
- "loss": 0.0086,
353
- "step": 440
354
- },
355
- {
356
- "epoch": 3.88,
357
- "grad_norm": 0.3355587422847748,
358
- "learning_rate": 9.051724137931034e-06,
359
- "loss": 0.0039,
360
- "step": 450
361
- },
362
- {
363
- "epoch": 3.97,
364
- "grad_norm": 0.03504301235079765,
365
- "learning_rate": 2.5862068965517237e-06,
366
- "loss": 0.0135,
367
- "step": 460
368
- },
369
- {
370
- "epoch": 4.0,
371
- "step": 464,
372
  "total_flos": 2.2913817801515827e+18,
373
- "train_loss": 0.19263494449491003,
374
- "train_runtime": 813.2364,
375
- "train_samples_per_second": 36.349,
376
- "train_steps_per_second": 0.571
377
  }
378
  ],
379
  "logging_steps": 10,
380
- "max_steps": 464,
381
  "num_input_tokens_seen": 0,
382
- "num_train_epochs": 4,
383
- "save_steps": 100,
384
  "total_flos": 2.2913817801515827e+18,
385
- "train_batch_size": 64,
386
  "trial_name": null,
387
  "trial_params": null
388
  }
 
1
  {
2
+ "best_metric": 0.344835102558136,
3
+ "best_model_checkpoint": "./vit-base-pets/checkpoint-235",
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 235,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.21,
13
+ "grad_norm": 216203.125,
14
+ "learning_rate": 0.0002872340425531915,
15
+ "loss": 3.3311,
16
  "step": 10
17
  },
18
  {
19
+ "epoch": 0.43,
20
+ "grad_norm": 198156.9375,
21
+ "learning_rate": 0.000274468085106383,
22
+ "loss": 2.5921,
23
  "step": 20
24
  },
25
  {
26
+ "epoch": 0.64,
27
+ "grad_norm": 176661.6875,
28
+ "learning_rate": 0.0002617021276595745,
29
+ "loss": 1.9823,
30
  "step": 30
31
  },
32
  {
33
+ "epoch": 0.85,
34
+ "grad_norm": 142275.75,
35
+ "learning_rate": 0.0002489361702127659,
36
+ "loss": 1.5136,
37
  "step": 40
38
  },
39
  {
40
+ "epoch": 1.0,
41
+ "eval_accuracy": 0.8430311231393776,
42
+ "eval_loss": 1.1030857563018799,
43
+ "eval_runtime": 8.7542,
44
+ "eval_samples_per_second": 84.417,
45
+ "eval_steps_per_second": 5.369,
46
+ "step": 47
47
+ },
48
+ {
49
+ "epoch": 1.06,
50
+ "grad_norm": 124899.421875,
51
+ "learning_rate": 0.00023617021276595742,
52
+ "loss": 1.1858,
53
  "step": 50
54
  },
55
  {
56
+ "epoch": 1.28,
57
+ "grad_norm": 98946.875,
58
+ "learning_rate": 0.0002234042553191489,
59
+ "loss": 0.9313,
60
  "step": 60
61
  },
62
  {
63
+ "epoch": 1.49,
64
+ "grad_norm": 92924.6484375,
65
+ "learning_rate": 0.0002106382978723404,
66
+ "loss": 0.7466,
67
  "step": 70
68
  },
69
  {
70
+ "epoch": 1.7,
71
+ "grad_norm": 90032.1484375,
72
+ "learning_rate": 0.00019787234042553187,
73
+ "loss": 0.6475,
74
  "step": 80
75
  },
76
  {
77
+ "epoch": 1.91,
78
+ "grad_norm": 68696.1875,
79
+ "learning_rate": 0.0001851063829787234,
80
+ "loss": 0.5547,
81
  "step": 90
82
  },
83
  {
84
+ "epoch": 2.0,
85
+ "eval_accuracy": 0.9269282814614344,
86
+ "eval_loss": 0.5232290625572205,
87
+ "eval_runtime": 8.9185,
88
+ "eval_samples_per_second": 82.861,
89
+ "eval_steps_per_second": 5.27,
90
+ "step": 94
91
  },
92
  {
93
+ "epoch": 2.13,
94
+ "grad_norm": 67699.609375,
95
+ "learning_rate": 0.0001723404255319149,
96
+ "loss": 0.5311,
 
 
97
  "step": 100
98
  },
99
  {
100
+ "epoch": 2.34,
101
+ "grad_norm": 72000.0234375,
102
+ "learning_rate": 0.00015957446808510637,
103
+ "loss": 0.4636,
104
  "step": 110
105
  },
106
  {
107
+ "epoch": 2.55,
108
+ "grad_norm": 54618.05078125,
109
+ "learning_rate": 0.00014680851063829785,
110
+ "loss": 0.4171,
111
  "step": 120
112
  },
113
  {
114
+ "epoch": 2.77,
115
+ "grad_norm": 57285.890625,
116
+ "learning_rate": 0.00013404255319148935,
117
+ "loss": 0.3946,
118
  "step": 130
119
  },
120
  {
121
+ "epoch": 2.98,
122
+ "grad_norm": 73116.6171875,
123
+ "learning_rate": 0.00012127659574468084,
124
+ "loss": 0.4111,
125
  "step": 140
126
  },
127
  {
128
+ "epoch": 3.0,
129
+ "eval_accuracy": 0.9309878213802436,
130
+ "eval_loss": 0.39878538250923157,
131
+ "eval_runtime": 9.1048,
132
+ "eval_samples_per_second": 81.166,
133
+ "eval_steps_per_second": 5.162,
134
+ "step": 141
135
+ },
136
+ {
137
+ "epoch": 3.19,
138
+ "grad_norm": 64126.3828125,
139
+ "learning_rate": 0.00010851063829787234,
140
+ "loss": 0.3607,
141
  "step": 150
142
  },
143
  {
144
+ "epoch": 3.4,
145
+ "grad_norm": 69913.5390625,
146
+ "learning_rate": 9.574468085106382e-05,
147
+ "loss": 0.3387,
148
  "step": 160
149
  },
150
  {
151
+ "epoch": 3.62,
152
+ "grad_norm": 54300.03125,
153
+ "learning_rate": 8.297872340425531e-05,
154
+ "loss": 0.3568,
155
  "step": 170
156
  },
157
  {
158
+ "epoch": 3.83,
159
+ "grad_norm": 60929.75390625,
160
+ "learning_rate": 7.02127659574468e-05,
161
+ "loss": 0.3438,
162
  "step": 180
163
  },
164
  {
165
+ "epoch": 4.0,
166
+ "eval_accuracy": 0.9336941813261164,
167
+ "eval_loss": 0.35527506470680237,
168
+ "eval_runtime": 9.4286,
169
+ "eval_samples_per_second": 78.378,
170
+ "eval_steps_per_second": 4.985,
171
+ "step": 188
172
+ },
173
+ {
174
+ "epoch": 4.04,
175
+ "grad_norm": 69279.328125,
176
+ "learning_rate": 5.7446808510638294e-05,
177
+ "loss": 0.3087,
178
  "step": 190
179
  },
180
  {
181
+ "epoch": 4.26,
182
+ "grad_norm": 49415.69140625,
183
+ "learning_rate": 4.468085106382978e-05,
184
+ "loss": 0.328,
 
 
 
 
 
 
 
 
 
185
  "step": 200
186
  },
187
  {
188
+ "epoch": 4.47,
189
+ "grad_norm": 62788.3359375,
190
+ "learning_rate": 3.1914893617021275e-05,
191
+ "loss": 0.3199,
192
  "step": 210
193
  },
194
  {
195
+ "epoch": 4.68,
196
+ "grad_norm": 61852.7421875,
197
+ "learning_rate": 1.9148936170212762e-05,
198
+ "loss": 0.3244,
199
  "step": 220
200
  },
201
  {
202
+ "epoch": 4.89,
203
+ "grad_norm": 64687.453125,
204
+ "learning_rate": 6.382978723404255e-06,
205
+ "loss": 0.298,
206
  "step": 230
207
  },
208
  {
209
+ "epoch": 5.0,
210
+ "eval_accuracy": 0.9296346414073072,
211
+ "eval_loss": 0.344835102558136,
212
+ "eval_runtime": 9.1923,
213
+ "eval_samples_per_second": 80.393,
214
+ "eval_steps_per_second": 5.113,
215
+ "step": 235
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  },
217
  {
218
+ "epoch": 5.0,
219
+ "step": 235,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  "total_flos": 2.2913817801515827e+18,
221
+ "train_loss": 0.8009341437765892,
222
+ "train_runtime": 407.3253,
223
+ "train_samples_per_second": 72.571,
224
+ "train_steps_per_second": 0.577
225
  }
226
  ],
227
  "logging_steps": 10,
228
+ "max_steps": 235,
229
  "num_input_tokens_seen": 0,
230
+ "num_train_epochs": 5,
231
+ "save_steps": 500,
232
  "total_flos": 2.2913817801515827e+18,
233
+ "train_batch_size": 128,
234
  "trial_name": null,
235
  "trial_params": null
236
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2d9f5cc8c59e3763badc8aaac1a4d41fe76330287aca603b90141946989fe67
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33d4c76de48bc1cfcdacd9758735c59ce75dd9bf02657f219a30684fa44e0d63
3
  size 4920