.gitignore DELETED
@@ -1 +0,0 @@
1
- checkpoint-*/
 
 
README.md DELETED
@@ -1,87 +0,0 @@
1
- ---
2
- license: apache-2.0
3
- tags:
4
- - image-classification
5
- - generated_from_trainer
6
- datasets:
7
- - snacks
8
- metrics:
9
- - accuracy
10
- model-index:
11
- - name: vit-snacks
12
- results:
13
- - task:
14
- name: Image Classification
15
- type: image-classification
16
- dataset:
17
- name: Matthijs/snacks
18
- type: snacks
19
- args: default
20
- metrics:
21
- - name: Accuracy
22
- type: accuracy
23
- value: 0.9392670157068063
24
- ---
25
-
26
- <!-- This model card has been generated automatically according to the information the Trainer had access to. You
27
- should probably proofread and complete it, then remove this comment. -->
28
-
29
- # vit-snacks
30
-
31
- This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the Matthijs/snacks dataset.
32
- It achieves the following results on the evaluation set:
33
- - Loss: 0.2754
34
- - Accuracy: 0.9393
35
-
36
- ## Model description
37
-
38
- upload any image of your fave yummy snack
39
-
40
- ## Intended uses & limitations
41
-
42
- there are only 20 different varieties of snacks
43
-
44
- ## Training and evaluation data
45
-
46
- More information needed
47
-
48
- ## Training procedure
49
-
50
- ### Training hyperparameters
51
-
52
- The following hyperparameters were used during training:
53
- - learning_rate: 0.0002
54
- - train_batch_size: 16
55
- - eval_batch_size: 8
56
- - seed: 42
57
- - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
58
- - lr_scheduler_type: linear
59
- - num_epochs: 5
60
-
61
- ### Training results
62
-
63
- | Training Loss | Epoch | Step | Validation Loss | Accuracy |
64
- |:-------------:|:-----:|:----:|:---------------:|:--------:|
65
- | 0.8724 | 0.33 | 100 | 0.9118 | 0.8670 |
66
- | 0.5628 | 0.66 | 200 | 0.6873 | 0.8471 |
67
- | 0.4421 | 0.99 | 300 | 0.4995 | 0.8691 |
68
- | 0.2837 | 1.32 | 400 | 0.4008 | 0.9026 |
69
- | 0.1645 | 1.65 | 500 | 0.3702 | 0.9058 |
70
- | 0.1604 | 1.98 | 600 | 0.3981 | 0.8921 |
71
- | 0.0498 | 2.31 | 700 | 0.3185 | 0.9204 |
72
- | 0.0406 | 2.64 | 800 | 0.3427 | 0.9141 |
73
- | 0.1049 | 2.97 | 900 | 0.3444 | 0.9173 |
74
- | 0.0272 | 3.3 | 1000 | 0.3168 | 0.9246 |
75
- | 0.0186 | 3.63 | 1100 | 0.3142 | 0.9288 |
76
- | 0.0203 | 3.96 | 1200 | 0.2931 | 0.9298 |
77
- | 0.007 | 4.29 | 1300 | 0.2754 | 0.9393 |
78
- | 0.0072 | 4.62 | 1400 | 0.2778 | 0.9403 |
79
- | 0.0073 | 4.95 | 1500 | 0.2782 | 0.9393 |
80
-
81
-
82
- ### Framework versions
83
-
84
- - Transformers 4.20.1
85
- - Pytorch 1.11.0+cu113
86
- - Datasets 2.3.2
87
- - Tokenizers 0.12.1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
all_results.json DELETED
@@ -1,13 +0,0 @@
1
- {
2
- "epoch": 5.0,
3
- "eval_accuracy": 0.9392670157068063,
4
- "eval_loss": 0.27537932991981506,
5
- "eval_runtime": 14.5416,
6
- "eval_samples_per_second": 65.674,
7
- "eval_steps_per_second": 8.252,
8
- "total_flos": 1.874833643725701e+18,
9
- "train_loss": 0.2613857026712926,
10
- "train_runtime": 1218.6852,
11
- "train_samples_per_second": 19.849,
12
- "train_steps_per_second": 1.243
13
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config.json DELETED
@@ -1,68 +0,0 @@
1
- {
2
- "_name_or_path": "google/vit-base-patch16-224-in21k",
3
- "architectures": [
4
- "ViTForImageClassification"
5
- ],
6
- "attention_probs_dropout_prob": 0.0,
7
- "encoder_stride": 16,
8
- "hidden_act": "gelu",
9
- "hidden_dropout_prob": 0.0,
10
- "hidden_size": 768,
11
- "id2label": {
12
- "0": "apple",
13
- "1": "banana",
14
- "10": "juice",
15
- "11": "muffin",
16
- "12": "orange",
17
- "13": "pineapple",
18
- "14": "popcorn",
19
- "15": "pretzel",
20
- "16": "salad",
21
- "17": "strawberry",
22
- "18": "waffle",
23
- "19": "watermelon",
24
- "2": "cake",
25
- "3": "candy",
26
- "4": "carrot",
27
- "5": "cookie",
28
- "6": "doughnut",
29
- "7": "grape",
30
- "8": "hot dog",
31
- "9": "ice cream"
32
- },
33
- "image_size": 224,
34
- "initializer_range": 0.02,
35
- "intermediate_size": 3072,
36
- "label2id": {
37
- "apple": "0",
38
- "banana": "1",
39
- "cake": "2",
40
- "candy": "3",
41
- "carrot": "4",
42
- "cookie": "5",
43
- "doughnut": "6",
44
- "grape": "7",
45
- "hot dog": "8",
46
- "ice cream": "9",
47
- "juice": "10",
48
- "muffin": "11",
49
- "orange": "12",
50
- "pineapple": "13",
51
- "popcorn": "14",
52
- "pretzel": "15",
53
- "salad": "16",
54
- "strawberry": "17",
55
- "waffle": "18",
56
- "watermelon": "19"
57
- },
58
- "layer_norm_eps": 1e-12,
59
- "model_type": "vit",
60
- "num_attention_heads": 12,
61
- "num_channels": 3,
62
- "num_hidden_layers": 12,
63
- "patch_size": 16,
64
- "problem_type": "single_label_classification",
65
- "qkv_bias": true,
66
- "torch_dtype": "float32",
67
- "transformers_version": "4.20.1"
68
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eval_results.json DELETED
@@ -1,8 +0,0 @@
1
- {
2
- "epoch": 5.0,
3
- "eval_accuracy": 0.9392670157068063,
4
- "eval_loss": 0.27537932991981506,
5
- "eval_runtime": 14.5416,
6
- "eval_samples_per_second": 65.674,
7
- "eval_steps_per_second": 8.252
8
- }
 
 
 
 
 
 
 
 
 
preprocessor_config.json DELETED
@@ -1,17 +0,0 @@
1
- {
2
- "do_normalize": true,
3
- "do_resize": true,
4
- "feature_extractor_type": "ViTFeatureExtractor",
5
- "image_mean": [
6
- 0.5,
7
- 0.5,
8
- 0.5
9
- ],
10
- "image_std": [
11
- 0.5,
12
- 0.5,
13
- 0.5
14
- ],
15
- "resample": 2,
16
- "size": 224
17
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e13ac40790f2c68d2f0040930967454da6f0a23fe5b40dd029e70416b25bab4f
3
- size 343322353
 
 
 
 
runs/Jun30_06-08-28_fbcba3ddf03a/1656569334.957422/events.out.tfevents.1656569334.fbcba3ddf03a.74.1 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:efc771484cb4ea5464092ee3c0af976cb72257ebf94f7dd6d900782bd0cca03b
3
- size 5308
 
 
 
 
runs/Jun30_06-08-28_fbcba3ddf03a/events.out.tfevents.1656569334.fbcba3ddf03a.74.0 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:eac6669ca35087a5296a9b43f62e5ac88bf426ee90476aaeab42219062a6020c
3
- size 33168
 
 
 
 
runs/Jun30_06-08-28_fbcba3ddf03a/events.out.tfevents.1656570869.fbcba3ddf03a.74.2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:978035f66abf3fc6e6e6e9c6ed511321ce64c47f4b0db5444f46c155244c3a10
3
- size 363
 
 
 
 
train_results.json DELETED
@@ -1,8 +0,0 @@
1
- {
2
- "epoch": 5.0,
3
- "total_flos": 1.874833643725701e+18,
4
- "train_loss": 0.2613857026712926,
5
- "train_runtime": 1218.6852,
6
- "train_samples_per_second": 19.849,
7
- "train_steps_per_second": 1.243
8
- }
 
 
 
 
 
 
 
 
 
trainer_state.json DELETED
@@ -1,1066 +0,0 @@
1
- {
2
- "best_metric": 0.27537932991981506,
3
- "best_model_checkpoint": "vit-snacks/checkpoint-1300",
4
- "epoch": 5.0,
5
- "global_step": 1515,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.03,
12
- "learning_rate": 0.0001986798679867987,
13
- "loss": 2.8877,
14
- "step": 10
15
- },
16
- {
17
- "epoch": 0.07,
18
- "learning_rate": 0.00019735973597359737,
19
- "loss": 2.6212,
20
- "step": 20
21
- },
22
- {
23
- "epoch": 0.1,
24
- "learning_rate": 0.00019603960396039606,
25
- "loss": 2.3151,
26
- "step": 30
27
- },
28
- {
29
- "epoch": 0.13,
30
- "learning_rate": 0.00019471947194719472,
31
- "loss": 1.9591,
32
- "step": 40
33
- },
34
- {
35
- "epoch": 0.17,
36
- "learning_rate": 0.0001933993399339934,
37
- "loss": 1.7106,
38
- "step": 50
39
- },
40
- {
41
- "epoch": 0.2,
42
- "learning_rate": 0.00019207920792079208,
43
- "loss": 1.367,
44
- "step": 60
45
- },
46
- {
47
- "epoch": 0.23,
48
- "learning_rate": 0.00019075907590759077,
49
- "loss": 1.2285,
50
- "step": 70
51
- },
52
- {
53
- "epoch": 0.26,
54
- "learning_rate": 0.00018943894389438946,
55
- "loss": 1.0703,
56
- "step": 80
57
- },
58
- {
59
- "epoch": 0.3,
60
- "learning_rate": 0.00018811881188118812,
61
- "loss": 1.1937,
62
- "step": 90
63
- },
64
- {
65
- "epoch": 0.33,
66
- "learning_rate": 0.00018679867986798681,
67
- "loss": 0.8724,
68
- "step": 100
69
- },
70
- {
71
- "epoch": 0.33,
72
- "eval_accuracy": 0.8670157068062827,
73
- "eval_loss": 0.9118377566337585,
74
- "eval_runtime": 14.6857,
75
- "eval_samples_per_second": 65.029,
76
- "eval_steps_per_second": 8.171,
77
- "step": 100
78
- },
79
- {
80
- "epoch": 0.36,
81
- "learning_rate": 0.00018547854785478548,
82
- "loss": 0.9309,
83
- "step": 110
84
- },
85
- {
86
- "epoch": 0.4,
87
- "learning_rate": 0.00018415841584158417,
88
- "loss": 0.8163,
89
- "step": 120
90
- },
91
- {
92
- "epoch": 0.43,
93
- "learning_rate": 0.00018283828382838283,
94
- "loss": 0.7416,
95
- "step": 130
96
- },
97
- {
98
- "epoch": 0.46,
99
- "learning_rate": 0.00018151815181518153,
100
- "loss": 0.6442,
101
- "step": 140
102
- },
103
- {
104
- "epoch": 0.5,
105
- "learning_rate": 0.00018019801980198022,
106
- "loss": 0.7906,
107
- "step": 150
108
- },
109
- {
110
- "epoch": 0.53,
111
- "learning_rate": 0.0001788778877887789,
112
- "loss": 0.6137,
113
- "step": 160
114
- },
115
- {
116
- "epoch": 0.56,
117
- "learning_rate": 0.00017755775577557757,
118
- "loss": 0.6342,
119
- "step": 170
120
- },
121
- {
122
- "epoch": 0.59,
123
- "learning_rate": 0.00017623762376237624,
124
- "loss": 0.7081,
125
- "step": 180
126
- },
127
- {
128
- "epoch": 0.63,
129
- "learning_rate": 0.00017491749174917493,
130
- "loss": 0.51,
131
- "step": 190
132
- },
133
- {
134
- "epoch": 0.66,
135
- "learning_rate": 0.0001735973597359736,
136
- "loss": 0.5628,
137
- "step": 200
138
- },
139
- {
140
- "epoch": 0.66,
141
- "eval_accuracy": 0.8471204188481676,
142
- "eval_loss": 0.6873391270637512,
143
- "eval_runtime": 16.1319,
144
- "eval_samples_per_second": 59.2,
145
- "eval_steps_per_second": 7.439,
146
- "step": 200
147
- },
148
- {
149
- "epoch": 0.69,
150
- "learning_rate": 0.00017227722772277228,
151
- "loss": 0.6635,
152
- "step": 210
153
- },
154
- {
155
- "epoch": 0.73,
156
- "learning_rate": 0.00017095709570957098,
157
- "loss": 0.51,
158
- "step": 220
159
- },
160
- {
161
- "epoch": 0.76,
162
- "learning_rate": 0.00016963696369636967,
163
- "loss": 0.5002,
164
- "step": 230
165
- },
166
- {
167
- "epoch": 0.79,
168
- "learning_rate": 0.00016831683168316833,
169
- "loss": 0.6353,
170
- "step": 240
171
- },
172
- {
173
- "epoch": 0.83,
174
- "learning_rate": 0.000166996699669967,
175
- "loss": 0.5326,
176
- "step": 250
177
- },
178
- {
179
- "epoch": 0.86,
180
- "learning_rate": 0.00016567656765676569,
181
- "loss": 0.5051,
182
- "step": 260
183
- },
184
- {
185
- "epoch": 0.89,
186
- "learning_rate": 0.00016435643564356435,
187
- "loss": 0.6527,
188
- "step": 270
189
- },
190
- {
191
- "epoch": 0.92,
192
- "learning_rate": 0.00016303630363036304,
193
- "loss": 0.4961,
194
- "step": 280
195
- },
196
- {
197
- "epoch": 0.96,
198
- "learning_rate": 0.00016171617161716173,
199
- "loss": 0.4981,
200
- "step": 290
201
- },
202
- {
203
- "epoch": 0.99,
204
- "learning_rate": 0.00016039603960396042,
205
- "loss": 0.4421,
206
- "step": 300
207
- },
208
- {
209
- "epoch": 0.99,
210
- "eval_accuracy": 0.8691099476439791,
211
- "eval_loss": 0.4995421767234802,
212
- "eval_runtime": 15.1178,
213
- "eval_samples_per_second": 63.17,
214
- "eval_steps_per_second": 7.938,
215
- "step": 300
216
- },
217
- {
218
- "epoch": 1.02,
219
- "learning_rate": 0.0001590759075907591,
220
- "loss": 0.4434,
221
- "step": 310
222
- },
223
- {
224
- "epoch": 1.06,
225
- "learning_rate": 0.00015775577557755775,
226
- "loss": 0.2569,
227
- "step": 320
228
- },
229
- {
230
- "epoch": 1.09,
231
- "learning_rate": 0.00015643564356435644,
232
- "loss": 0.2208,
233
- "step": 330
234
- },
235
- {
236
- "epoch": 1.12,
237
- "learning_rate": 0.0001551155115511551,
238
- "loss": 0.3017,
239
- "step": 340
240
- },
241
- {
242
- "epoch": 1.16,
243
- "learning_rate": 0.0001537953795379538,
244
- "loss": 0.2288,
245
- "step": 350
246
- },
247
- {
248
- "epoch": 1.19,
249
- "learning_rate": 0.0001524752475247525,
250
- "loss": 0.1533,
251
- "step": 360
252
- },
253
- {
254
- "epoch": 1.22,
255
- "learning_rate": 0.00015115511551155118,
256
- "loss": 0.3527,
257
- "step": 370
258
- },
259
- {
260
- "epoch": 1.25,
261
- "learning_rate": 0.00014983498349834985,
262
- "loss": 0.1966,
263
- "step": 380
264
- },
265
- {
266
- "epoch": 1.29,
267
- "learning_rate": 0.0001485148514851485,
268
- "loss": 0.1962,
269
- "step": 390
270
- },
271
- {
272
- "epoch": 1.32,
273
- "learning_rate": 0.0001471947194719472,
274
- "loss": 0.2837,
275
- "step": 400
276
- },
277
- {
278
- "epoch": 1.32,
279
- "eval_accuracy": 0.9026178010471204,
280
- "eval_loss": 0.40083953738212585,
281
- "eval_runtime": 14.9253,
282
- "eval_samples_per_second": 63.985,
283
- "eval_steps_per_second": 8.04,
284
- "step": 400
285
- },
286
- {
287
- "epoch": 1.35,
288
- "learning_rate": 0.00014587458745874587,
289
- "loss": 0.1951,
290
- "step": 410
291
- },
292
- {
293
- "epoch": 1.39,
294
- "learning_rate": 0.00014455445544554456,
295
- "loss": 0.2244,
296
- "step": 420
297
- },
298
- {
299
- "epoch": 1.42,
300
- "learning_rate": 0.00014323432343234325,
301
- "loss": 0.144,
302
- "step": 430
303
- },
304
- {
305
- "epoch": 1.45,
306
- "learning_rate": 0.00014191419141914194,
307
- "loss": 0.1707,
308
- "step": 440
309
- },
310
- {
311
- "epoch": 1.49,
312
- "learning_rate": 0.0001405940594059406,
313
- "loss": 0.1881,
314
- "step": 450
315
- },
316
- {
317
- "epoch": 1.52,
318
- "learning_rate": 0.00013927392739273927,
319
- "loss": 0.2441,
320
- "step": 460
321
- },
322
- {
323
- "epoch": 1.55,
324
- "learning_rate": 0.00013795379537953796,
325
- "loss": 0.2128,
326
- "step": 470
327
- },
328
- {
329
- "epoch": 1.58,
330
- "learning_rate": 0.00013663366336633665,
331
- "loss": 0.2773,
332
- "step": 480
333
- },
334
- {
335
- "epoch": 1.62,
336
- "learning_rate": 0.00013531353135313532,
337
- "loss": 0.2554,
338
- "step": 490
339
- },
340
- {
341
- "epoch": 1.65,
342
- "learning_rate": 0.000133993399339934,
343
- "loss": 0.1645,
344
- "step": 500
345
- },
346
- {
347
- "epoch": 1.65,
348
- "eval_accuracy": 0.9057591623036649,
349
- "eval_loss": 0.3701848089694977,
350
- "eval_runtime": 14.4771,
351
- "eval_samples_per_second": 65.966,
352
- "eval_steps_per_second": 8.289,
353
- "step": 500
354
- },
355
- {
356
- "epoch": 1.68,
357
- "learning_rate": 0.0001326732673267327,
358
- "loss": 0.2479,
359
- "step": 510
360
- },
361
- {
362
- "epoch": 1.72,
363
- "learning_rate": 0.00013135313531353136,
364
- "loss": 0.1427,
365
- "step": 520
366
- },
367
- {
368
- "epoch": 1.75,
369
- "learning_rate": 0.00013003300330033003,
370
- "loss": 0.234,
371
- "step": 530
372
- },
373
- {
374
- "epoch": 1.78,
375
- "learning_rate": 0.00012871287128712872,
376
- "loss": 0.2,
377
- "step": 540
378
- },
379
- {
380
- "epoch": 1.82,
381
- "learning_rate": 0.0001273927392739274,
382
- "loss": 0.3339,
383
- "step": 550
384
- },
385
- {
386
- "epoch": 1.85,
387
- "learning_rate": 0.00012607260726072607,
388
- "loss": 0.1089,
389
- "step": 560
390
- },
391
- {
392
- "epoch": 1.88,
393
- "learning_rate": 0.00012475247524752477,
394
- "loss": 0.2647,
395
- "step": 570
396
- },
397
- {
398
- "epoch": 1.91,
399
- "learning_rate": 0.00012343234323432346,
400
- "loss": 0.1648,
401
- "step": 580
402
- },
403
- {
404
- "epoch": 1.95,
405
- "learning_rate": 0.00012211221122112212,
406
- "loss": 0.1728,
407
- "step": 590
408
- },
409
- {
410
- "epoch": 1.98,
411
- "learning_rate": 0.0001207920792079208,
412
- "loss": 0.1604,
413
- "step": 600
414
- },
415
- {
416
- "epoch": 1.98,
417
- "eval_accuracy": 0.8921465968586387,
418
- "eval_loss": 0.3981299102306366,
419
- "eval_runtime": 14.3841,
420
- "eval_samples_per_second": 66.393,
421
- "eval_steps_per_second": 8.343,
422
- "step": 600
423
- },
424
- {
425
- "epoch": 2.01,
426
- "learning_rate": 0.00011947194719471948,
427
- "loss": 0.0877,
428
- "step": 610
429
- },
430
- {
431
- "epoch": 2.05,
432
- "learning_rate": 0.00011815181518151817,
433
- "loss": 0.119,
434
- "step": 620
435
- },
436
- {
437
- "epoch": 2.08,
438
- "learning_rate": 0.00011683168316831683,
439
- "loss": 0.0758,
440
- "step": 630
441
- },
442
- {
443
- "epoch": 2.11,
444
- "learning_rate": 0.00011551155115511551,
445
- "loss": 0.1496,
446
- "step": 640
447
- },
448
- {
449
- "epoch": 2.15,
450
- "learning_rate": 0.0001141914191419142,
451
- "loss": 0.0617,
452
- "step": 650
453
- },
454
- {
455
- "epoch": 2.18,
456
- "learning_rate": 0.00011287128712871287,
457
- "loss": 0.0627,
458
- "step": 660
459
- },
460
- {
461
- "epoch": 2.21,
462
- "learning_rate": 0.00011155115511551156,
463
- "loss": 0.0948,
464
- "step": 670
465
- },
466
- {
467
- "epoch": 2.24,
468
- "learning_rate": 0.00011023102310231023,
469
- "loss": 0.0987,
470
- "step": 680
471
- },
472
- {
473
- "epoch": 2.28,
474
- "learning_rate": 0.00010891089108910893,
475
- "loss": 0.0473,
476
- "step": 690
477
- },
478
- {
479
- "epoch": 2.31,
480
- "learning_rate": 0.00010759075907590759,
481
- "loss": 0.0498,
482
- "step": 700
483
- },
484
- {
485
- "epoch": 2.31,
486
- "eval_accuracy": 0.9204188481675393,
487
- "eval_loss": 0.31845271587371826,
488
- "eval_runtime": 14.4624,
489
- "eval_samples_per_second": 66.033,
490
- "eval_steps_per_second": 8.297,
491
- "step": 700
492
- },
493
- {
494
- "epoch": 2.34,
495
- "learning_rate": 0.00010627062706270627,
496
- "loss": 0.0428,
497
- "step": 710
498
- },
499
- {
500
- "epoch": 2.38,
501
- "learning_rate": 0.00010495049504950496,
502
- "loss": 0.0848,
503
- "step": 720
504
- },
505
- {
506
- "epoch": 2.41,
507
- "learning_rate": 0.00010363036303630365,
508
- "loss": 0.0512,
509
- "step": 730
510
- },
511
- {
512
- "epoch": 2.44,
513
- "learning_rate": 0.00010231023102310232,
514
- "loss": 0.043,
515
- "step": 740
516
- },
517
- {
518
- "epoch": 2.48,
519
- "learning_rate": 0.00010099009900990099,
520
- "loss": 0.0372,
521
- "step": 750
522
- },
523
- {
524
- "epoch": 2.51,
525
- "learning_rate": 9.966996699669967e-05,
526
- "loss": 0.1442,
527
- "step": 760
528
- },
529
- {
530
- "epoch": 2.54,
531
- "learning_rate": 9.834983498349836e-05,
532
- "loss": 0.0426,
533
- "step": 770
534
- },
535
- {
536
- "epoch": 2.57,
537
- "learning_rate": 9.702970297029703e-05,
538
- "loss": 0.0511,
539
- "step": 780
540
- },
541
- {
542
- "epoch": 2.61,
543
- "learning_rate": 9.570957095709572e-05,
544
- "loss": 0.0634,
545
- "step": 790
546
- },
547
- {
548
- "epoch": 2.64,
549
- "learning_rate": 9.43894389438944e-05,
550
- "loss": 0.0406,
551
- "step": 800
552
- },
553
- {
554
- "epoch": 2.64,
555
- "eval_accuracy": 0.9141361256544502,
556
- "eval_loss": 0.3426617980003357,
557
- "eval_runtime": 14.6543,
558
- "eval_samples_per_second": 65.168,
559
- "eval_steps_per_second": 8.189,
560
- "step": 800
561
- },
562
- {
563
- "epoch": 2.67,
564
- "learning_rate": 9.306930693069307e-05,
565
- "loss": 0.1104,
566
- "step": 810
567
- },
568
- {
569
- "epoch": 2.71,
570
- "learning_rate": 9.174917491749175e-05,
571
- "loss": 0.0778,
572
- "step": 820
573
- },
574
- {
575
- "epoch": 2.74,
576
- "learning_rate": 9.042904290429043e-05,
577
- "loss": 0.1189,
578
- "step": 830
579
- },
580
- {
581
- "epoch": 2.77,
582
- "learning_rate": 8.910891089108912e-05,
583
- "loss": 0.0873,
584
- "step": 840
585
- },
586
- {
587
- "epoch": 2.81,
588
- "learning_rate": 8.778877887788778e-05,
589
- "loss": 0.0414,
590
- "step": 850
591
- },
592
- {
593
- "epoch": 2.84,
594
- "learning_rate": 8.646864686468648e-05,
595
- "loss": 0.0723,
596
- "step": 860
597
- },
598
- {
599
- "epoch": 2.87,
600
- "learning_rate": 8.514851485148515e-05,
601
- "loss": 0.0495,
602
- "step": 870
603
- },
604
- {
605
- "epoch": 2.9,
606
- "learning_rate": 8.382838283828383e-05,
607
- "loss": 0.0698,
608
- "step": 880
609
- },
610
- {
611
- "epoch": 2.94,
612
- "learning_rate": 8.250825082508251e-05,
613
- "loss": 0.0636,
614
- "step": 890
615
- },
616
- {
617
- "epoch": 2.97,
618
- "learning_rate": 8.11881188118812e-05,
619
- "loss": 0.1049,
620
- "step": 900
621
- },
622
- {
623
- "epoch": 2.97,
624
- "eval_accuracy": 0.9172774869109948,
625
- "eval_loss": 0.344361275434494,
626
- "eval_runtime": 14.443,
627
- "eval_samples_per_second": 66.122,
628
- "eval_steps_per_second": 8.309,
629
- "step": 900
630
- },
631
- {
632
- "epoch": 3.0,
633
- "learning_rate": 7.986798679867988e-05,
634
- "loss": 0.0148,
635
- "step": 910
636
- },
637
- {
638
- "epoch": 3.04,
639
- "learning_rate": 7.854785478547854e-05,
640
- "loss": 0.0134,
641
- "step": 920
642
- },
643
- {
644
- "epoch": 3.07,
645
- "learning_rate": 7.722772277227723e-05,
646
- "loss": 0.0263,
647
- "step": 930
648
- },
649
- {
650
- "epoch": 3.1,
651
- "learning_rate": 7.590759075907591e-05,
652
- "loss": 0.0453,
653
- "step": 940
654
- },
655
- {
656
- "epoch": 3.14,
657
- "learning_rate": 7.458745874587459e-05,
658
- "loss": 0.0156,
659
- "step": 950
660
- },
661
- {
662
- "epoch": 3.17,
663
- "learning_rate": 7.326732673267327e-05,
664
- "loss": 0.0125,
665
- "step": 960
666
- },
667
- {
668
- "epoch": 3.2,
669
- "learning_rate": 7.194719471947196e-05,
670
- "loss": 0.0118,
671
- "step": 970
672
- },
673
- {
674
- "epoch": 3.23,
675
- "learning_rate": 7.062706270627064e-05,
676
- "loss": 0.0116,
677
- "step": 980
678
- },
679
- {
680
- "epoch": 3.27,
681
- "learning_rate": 6.93069306930693e-05,
682
- "loss": 0.011,
683
- "step": 990
684
- },
685
- {
686
- "epoch": 3.3,
687
- "learning_rate": 6.798679867986799e-05,
688
- "loss": 0.0272,
689
- "step": 1000
690
- },
691
- {
692
- "epoch": 3.3,
693
- "eval_accuracy": 0.9246073298429319,
694
- "eval_loss": 0.31677740812301636,
695
- "eval_runtime": 14.7709,
696
- "eval_samples_per_second": 64.654,
697
- "eval_steps_per_second": 8.124,
698
- "step": 1000
699
- },
700
- {
701
- "epoch": 3.33,
702
- "learning_rate": 6.666666666666667e-05,
703
- "loss": 0.0113,
704
- "step": 1010
705
- },
706
- {
707
- "epoch": 3.37,
708
- "learning_rate": 6.534653465346535e-05,
709
- "loss": 0.0287,
710
- "step": 1020
711
- },
712
- {
713
- "epoch": 3.4,
714
- "learning_rate": 6.402640264026403e-05,
715
- "loss": 0.0326,
716
- "step": 1030
717
- },
718
- {
719
- "epoch": 3.43,
720
- "learning_rate": 6.270627062706272e-05,
721
- "loss": 0.0157,
722
- "step": 1040
723
- },
724
- {
725
- "epoch": 3.47,
726
- "learning_rate": 6.13861386138614e-05,
727
- "loss": 0.0135,
728
- "step": 1050
729
- },
730
- {
731
- "epoch": 3.5,
732
- "learning_rate": 6.006600660066007e-05,
733
- "loss": 0.0154,
734
- "step": 1060
735
- },
736
- {
737
- "epoch": 3.53,
738
- "learning_rate": 5.874587458745875e-05,
739
- "loss": 0.0357,
740
- "step": 1070
741
- },
742
- {
743
- "epoch": 3.56,
744
- "learning_rate": 5.742574257425742e-05,
745
- "loss": 0.0161,
746
- "step": 1080
747
- },
748
- {
749
- "epoch": 3.6,
750
- "learning_rate": 5.610561056105611e-05,
751
- "loss": 0.04,
752
- "step": 1090
753
- },
754
- {
755
- "epoch": 3.63,
756
- "learning_rate": 5.4785478547854784e-05,
757
- "loss": 0.0186,
758
- "step": 1100
759
- },
760
- {
761
- "epoch": 3.63,
762
- "eval_accuracy": 0.9287958115183246,
763
- "eval_loss": 0.3141845762729645,
764
- "eval_runtime": 14.3909,
765
- "eval_samples_per_second": 66.361,
766
- "eval_steps_per_second": 8.339,
767
- "step": 1100
768
- },
769
- {
770
- "epoch": 3.66,
771
- "learning_rate": 5.346534653465347e-05,
772
- "loss": 0.0119,
773
- "step": 1110
774
- },
775
- {
776
- "epoch": 3.7,
777
- "learning_rate": 5.2145214521452146e-05,
778
- "loss": 0.0089,
779
- "step": 1120
780
- },
781
- {
782
- "epoch": 3.73,
783
- "learning_rate": 5.082508250825083e-05,
784
- "loss": 0.0097,
785
- "step": 1130
786
- },
787
- {
788
- "epoch": 3.76,
789
- "learning_rate": 4.950495049504951e-05,
790
- "loss": 0.0561,
791
- "step": 1140
792
- },
793
- {
794
- "epoch": 3.8,
795
- "learning_rate": 4.8184818481848186e-05,
796
- "loss": 0.0457,
797
- "step": 1150
798
- },
799
- {
800
- "epoch": 3.83,
801
- "learning_rate": 4.686468646864687e-05,
802
- "loss": 0.0129,
803
- "step": 1160
804
- },
805
- {
806
- "epoch": 3.86,
807
- "learning_rate": 4.554455445544555e-05,
808
- "loss": 0.0657,
809
- "step": 1170
810
- },
811
- {
812
- "epoch": 3.89,
813
- "learning_rate": 4.4224422442244226e-05,
814
- "loss": 0.0081,
815
- "step": 1180
816
- },
817
- {
818
- "epoch": 3.93,
819
- "learning_rate": 4.2904290429042904e-05,
820
- "loss": 0.0084,
821
- "step": 1190
822
- },
823
- {
824
- "epoch": 3.96,
825
- "learning_rate": 4.158415841584158e-05,
826
- "loss": 0.0203,
827
- "step": 1200
828
- },
829
- {
830
- "epoch": 3.96,
831
- "eval_accuracy": 0.9298429319371728,
832
- "eval_loss": 0.29309454560279846,
833
- "eval_runtime": 14.4376,
834
- "eval_samples_per_second": 66.147,
835
- "eval_steps_per_second": 8.312,
836
- "step": 1200
837
- },
838
- {
839
- "epoch": 3.99,
840
- "learning_rate": 4.0264026402640266e-05,
841
- "loss": 0.0459,
842
- "step": 1210
843
- },
844
- {
845
- "epoch": 4.03,
846
- "learning_rate": 3.8943894389438944e-05,
847
- "loss": 0.008,
848
- "step": 1220
849
- },
850
- {
851
- "epoch": 4.06,
852
- "learning_rate": 3.762376237623763e-05,
853
- "loss": 0.0078,
854
- "step": 1230
855
- },
856
- {
857
- "epoch": 4.09,
858
- "learning_rate": 3.6303630363036307e-05,
859
- "loss": 0.0079,
860
- "step": 1240
861
- },
862
- {
863
- "epoch": 4.13,
864
- "learning_rate": 3.4983498349834984e-05,
865
- "loss": 0.0087,
866
- "step": 1250
867
- },
868
- {
869
- "epoch": 4.16,
870
- "learning_rate": 3.366336633663367e-05,
871
- "loss": 0.008,
872
- "step": 1260
873
- },
874
- {
875
- "epoch": 4.19,
876
- "learning_rate": 3.234323432343234e-05,
877
- "loss": 0.0074,
878
- "step": 1270
879
- },
880
- {
881
- "epoch": 4.22,
882
- "learning_rate": 3.1023102310231024e-05,
883
- "loss": 0.0078,
884
- "step": 1280
885
- },
886
- {
887
- "epoch": 4.26,
888
- "learning_rate": 2.9702970297029702e-05,
889
- "loss": 0.0076,
890
- "step": 1290
891
- },
892
- {
893
- "epoch": 4.29,
894
- "learning_rate": 2.8382838283828383e-05,
895
- "loss": 0.007,
896
- "step": 1300
897
- },
898
- {
899
- "epoch": 4.29,
900
- "eval_accuracy": 0.9392670157068063,
901
- "eval_loss": 0.27537932991981506,
902
- "eval_runtime": 14.7619,
903
- "eval_samples_per_second": 64.694,
904
- "eval_steps_per_second": 8.129,
905
- "step": 1300
906
- },
907
- {
908
- "epoch": 4.32,
909
- "learning_rate": 2.7062706270627065e-05,
910
- "loss": 0.0073,
911
- "step": 1310
912
- },
913
- {
914
- "epoch": 4.36,
915
- "learning_rate": 2.5742574257425746e-05,
916
- "loss": 0.007,
917
- "step": 1320
918
- },
919
- {
920
- "epoch": 4.39,
921
- "learning_rate": 2.4422442244224424e-05,
922
- "loss": 0.0071,
923
- "step": 1330
924
- },
925
- {
926
- "epoch": 4.42,
927
- "learning_rate": 2.31023102310231e-05,
928
- "loss": 0.0068,
929
- "step": 1340
930
- },
931
- {
932
- "epoch": 4.46,
933
- "learning_rate": 2.1782178217821783e-05,
934
- "loss": 0.007,
935
- "step": 1350
936
- },
937
- {
938
- "epoch": 4.49,
939
- "learning_rate": 2.0462046204620464e-05,
940
- "loss": 0.0068,
941
- "step": 1360
942
- },
943
- {
944
- "epoch": 4.52,
945
- "learning_rate": 1.9141914191419145e-05,
946
- "loss": 0.0083,
947
- "step": 1370
948
- },
949
- {
950
- "epoch": 4.55,
951
- "learning_rate": 1.7821782178217823e-05,
952
- "loss": 0.0167,
953
- "step": 1380
954
- },
955
- {
956
- "epoch": 4.59,
957
- "learning_rate": 1.6501650165016504e-05,
958
- "loss": 0.0069,
959
- "step": 1390
960
- },
961
- {
962
- "epoch": 4.62,
963
- "learning_rate": 1.5181518151815183e-05,
964
- "loss": 0.0072,
965
- "step": 1400
966
- },
967
- {
968
- "epoch": 4.62,
969
- "eval_accuracy": 0.9403141361256544,
970
- "eval_loss": 0.27776801586151123,
971
- "eval_runtime": 14.4524,
972
- "eval_samples_per_second": 66.079,
973
- "eval_steps_per_second": 8.303,
974
- "step": 1400
975
- },
976
- {
977
- "epoch": 4.65,
978
- "learning_rate": 1.3861386138613863e-05,
979
- "loss": 0.0069,
980
- "step": 1410
981
- },
982
- {
983
- "epoch": 4.69,
984
- "learning_rate": 1.254125412541254e-05,
985
- "loss": 0.0067,
986
- "step": 1420
987
- },
988
- {
989
- "epoch": 4.72,
990
- "learning_rate": 1.1221122112211222e-05,
991
- "loss": 0.0066,
992
- "step": 1430
993
- },
994
- {
995
- "epoch": 4.75,
996
- "learning_rate": 9.900990099009901e-06,
997
- "loss": 0.0065,
998
- "step": 1440
999
- },
1000
- {
1001
- "epoch": 4.79,
1002
- "learning_rate": 8.58085808580858e-06,
1003
- "loss": 0.0342,
1004
- "step": 1450
1005
- },
1006
- {
1007
- "epoch": 4.82,
1008
- "learning_rate": 7.260726072607261e-06,
1009
- "loss": 0.0068,
1010
- "step": 1460
1011
- },
1012
- {
1013
- "epoch": 4.85,
1014
- "learning_rate": 5.940594059405941e-06,
1015
- "loss": 0.0075,
1016
- "step": 1470
1017
- },
1018
- {
1019
- "epoch": 4.88,
1020
- "learning_rate": 4.62046204620462e-06,
1021
- "loss": 0.0067,
1022
- "step": 1480
1023
- },
1024
- {
1025
- "epoch": 4.92,
1026
- "learning_rate": 3.3003300330033e-06,
1027
- "loss": 0.0066,
1028
- "step": 1490
1029
- },
1030
- {
1031
- "epoch": 4.95,
1032
- "learning_rate": 1.9801980198019803e-06,
1033
- "loss": 0.0073,
1034
- "step": 1500
1035
- },
1036
- {
1037
- "epoch": 4.95,
1038
- "eval_accuracy": 0.9392670157068063,
1039
- "eval_loss": 0.2782096564769745,
1040
- "eval_runtime": 14.5029,
1041
- "eval_samples_per_second": 65.849,
1042
- "eval_steps_per_second": 8.274,
1043
- "step": 1500
1044
- },
1045
- {
1046
- "epoch": 4.98,
1047
- "learning_rate": 6.600660066006602e-07,
1048
- "loss": 0.0408,
1049
- "step": 1510
1050
- },
1051
- {
1052
- "epoch": 5.0,
1053
- "step": 1515,
1054
- "total_flos": 1.874833643725701e+18,
1055
- "train_loss": 0.2613857026712926,
1056
- "train_runtime": 1218.6852,
1057
- "train_samples_per_second": 19.849,
1058
- "train_steps_per_second": 1.243
1059
- }
1060
- ],
1061
- "max_steps": 1515,
1062
- "num_train_epochs": 5,
1063
- "total_flos": 1.874833643725701e+18,
1064
- "trial_name": null,
1065
- "trial_params": null
1066
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e607a6a3dd02c81519e8dfc776c04e62de25dd6c3a2efe8b652e9a392739d8b2
3
- size 3247