khaled44 commited on
Commit
96d2f22
1 Parent(s): cec8abd

End of training

Browse files
README.md ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: google/vit-large-patch16-224-in21k
4
+ tags:
5
+ - generated_from_trainer
6
+ metrics:
7
+ - accuracy
8
+ model-index:
9
+ - name: vit-large-beans-demo-v5
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # vit-large-beans-demo-v5
17
+
18
+ This model is a fine-tuned version of [google/vit-large-patch16-224-in21k](https://huggingface.co/google/vit-large-patch16-224-in21k) on an unknown dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.6497
21
+ - Accuracy: 0.7335
22
+
23
+ ## Model description
24
+
25
+ More information needed
26
+
27
+ ## Intended uses & limitations
28
+
29
+ More information needed
30
+
31
+ ## Training and evaluation data
32
+
33
+ More information needed
34
+
35
+ ## Training procedure
36
+
37
+ ### Training hyperparameters
38
+
39
+ The following hyperparameters were used during training:
40
+ - learning_rate: 0.0001
41
+ - train_batch_size: 35
42
+ - eval_batch_size: 8
43
+ - seed: 42
44
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
+ - lr_scheduler_type: linear
46
+ - num_epochs: 2
47
+ - mixed_precision_training: Native AMP
48
+
49
+ ### Training results
50
+
51
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
52
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|
53
+ | 0.581 | 1.25 | 1000 | 0.6497 | 0.7335 |
54
+
55
+
56
+ ### Framework versions
57
+
58
+ - Transformers 4.35.2
59
+ - Pytorch 2.1.2+cu121
60
+ - Datasets 2.15.0
61
+ - Tokenizers 0.15.0
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "total_flos": 1.5287325828267258e+19,
4
+ "train_loss": 0.5975370544538761,
5
+ "train_runtime": 3758.5978,
6
+ "train_samples_per_second": 14.848,
7
+ "train_steps_per_second": 0.425
8
+ }
config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-large-patch16-224-in21k",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 1024,
11
+ "id2label": {
12
+ "0": "DR",
13
+ "1": "G",
14
+ "2": "ND",
15
+ "3": "WD",
16
+ "4": "other"
17
+ },
18
+ "image_size": 224,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 4096,
21
+ "label2id": {
22
+ "DR": "0",
23
+ "G": "1",
24
+ "ND": "2",
25
+ "WD": "3",
26
+ "other": "4"
27
+ },
28
+ "layer_norm_eps": 1e-12,
29
+ "model_type": "vit",
30
+ "num_attention_heads": 16,
31
+ "num_channels": 3,
32
+ "num_hidden_layers": 24,
33
+ "patch_size": 16,
34
+ "problem_type": "single_label_classification",
35
+ "qkv_bias": true,
36
+ "torch_dtype": "float32",
37
+ "transformers_version": "4.35.2"
38
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6dd0a041f3cce26a7c61135e2e7d9dbc4f811319406622ede04752e7946046a
3
+ size 1213273572
preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTFeatureExtractor",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 224,
20
+ "width": 224
21
+ }
22
+ }
runs/Dec22_06-31-25_f46503261215/events.out.tfevents.1703226686.f46503261215.344.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14a47b5b732b0814bf8cec70859146726ff5a1b8de848b50f5ed288cc3b76ff9
3
+ size 5081
runs/Dec22_06-33-31_f46503261215/events.out.tfevents.1703226811.f46503261215.344.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48a920e8f8da86e22b6fe42b15546c8bd72531c54d1c2f8c72174f1f67be0b3a
3
+ size 4184
runs/Dec22_06-34-39_f46503261215/events.out.tfevents.1703226879.f46503261215.344.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:774c53652032a4d98ac69aa090b4166174199e9bbe8339eadec6620fadf96ebe
3
+ size 4184
runs/Dec22_06-36-17_f46503261215/events.out.tfevents.1703226980.f46503261215.22573.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b88c97c591605eb5ec315b05174451be4188669bb350520402789008cdc54a53
3
+ size 29914
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "total_flos": 1.5287325828267258e+19,
4
+ "train_loss": 0.5975370544538761,
5
+ "train_runtime": 3758.5978,
6
+ "train_samples_per_second": 14.848,
7
+ "train_steps_per_second": 0.425
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,991 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6497328281402588,
3
+ "best_model_checkpoint": "./vit-large-beans-demo-v5/checkpoint-1000",
4
+ "epoch": 2.0,
5
+ "eval_steps": 1000,
6
+ "global_step": 1596,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.01,
13
+ "learning_rate": 9.937343358395991e-05,
14
+ "loss": 1.229,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.03,
19
+ "learning_rate": 9.87468671679198e-05,
20
+ "loss": 1.0071,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 0.04,
25
+ "learning_rate": 9.81203007518797e-05,
26
+ "loss": 0.9902,
27
+ "step": 30
28
+ },
29
+ {
30
+ "epoch": 0.05,
31
+ "learning_rate": 9.749373433583961e-05,
32
+ "loss": 0.9331,
33
+ "step": 40
34
+ },
35
+ {
36
+ "epoch": 0.06,
37
+ "learning_rate": 9.68671679197995e-05,
38
+ "loss": 0.9112,
39
+ "step": 50
40
+ },
41
+ {
42
+ "epoch": 0.08,
43
+ "learning_rate": 9.62406015037594e-05,
44
+ "loss": 0.904,
45
+ "step": 60
46
+ },
47
+ {
48
+ "epoch": 0.09,
49
+ "learning_rate": 9.56140350877193e-05,
50
+ "loss": 0.8937,
51
+ "step": 70
52
+ },
53
+ {
54
+ "epoch": 0.1,
55
+ "learning_rate": 9.49874686716792e-05,
56
+ "loss": 0.8887,
57
+ "step": 80
58
+ },
59
+ {
60
+ "epoch": 0.11,
61
+ "learning_rate": 9.436090225563911e-05,
62
+ "loss": 0.8668,
63
+ "step": 90
64
+ },
65
+ {
66
+ "epoch": 0.13,
67
+ "learning_rate": 9.373433583959899e-05,
68
+ "loss": 0.8551,
69
+ "step": 100
70
+ },
71
+ {
72
+ "epoch": 0.14,
73
+ "learning_rate": 9.31077694235589e-05,
74
+ "loss": 0.8759,
75
+ "step": 110
76
+ },
77
+ {
78
+ "epoch": 0.15,
79
+ "learning_rate": 9.24812030075188e-05,
80
+ "loss": 0.8171,
81
+ "step": 120
82
+ },
83
+ {
84
+ "epoch": 0.16,
85
+ "learning_rate": 9.18546365914787e-05,
86
+ "loss": 0.8854,
87
+ "step": 130
88
+ },
89
+ {
90
+ "epoch": 0.18,
91
+ "learning_rate": 9.12280701754386e-05,
92
+ "loss": 0.8901,
93
+ "step": 140
94
+ },
95
+ {
96
+ "epoch": 0.19,
97
+ "learning_rate": 9.06015037593985e-05,
98
+ "loss": 0.7488,
99
+ "step": 150
100
+ },
101
+ {
102
+ "epoch": 0.2,
103
+ "learning_rate": 8.99749373433584e-05,
104
+ "loss": 0.8285,
105
+ "step": 160
106
+ },
107
+ {
108
+ "epoch": 0.21,
109
+ "learning_rate": 8.93483709273183e-05,
110
+ "loss": 0.886,
111
+ "step": 170
112
+ },
113
+ {
114
+ "epoch": 0.23,
115
+ "learning_rate": 8.87218045112782e-05,
116
+ "loss": 0.8396,
117
+ "step": 180
118
+ },
119
+ {
120
+ "epoch": 0.24,
121
+ "learning_rate": 8.80952380952381e-05,
122
+ "loss": 0.802,
123
+ "step": 190
124
+ },
125
+ {
126
+ "epoch": 0.25,
127
+ "learning_rate": 8.7468671679198e-05,
128
+ "loss": 0.8519,
129
+ "step": 200
130
+ },
131
+ {
132
+ "epoch": 0.26,
133
+ "learning_rate": 8.68421052631579e-05,
134
+ "loss": 0.8145,
135
+ "step": 210
136
+ },
137
+ {
138
+ "epoch": 0.28,
139
+ "learning_rate": 8.621553884711779e-05,
140
+ "loss": 0.8867,
141
+ "step": 220
142
+ },
143
+ {
144
+ "epoch": 0.29,
145
+ "learning_rate": 8.55889724310777e-05,
146
+ "loss": 0.8109,
147
+ "step": 230
148
+ },
149
+ {
150
+ "epoch": 0.3,
151
+ "learning_rate": 8.49624060150376e-05,
152
+ "loss": 0.8546,
153
+ "step": 240
154
+ },
155
+ {
156
+ "epoch": 0.31,
157
+ "learning_rate": 8.43358395989975e-05,
158
+ "loss": 0.7561,
159
+ "step": 250
160
+ },
161
+ {
162
+ "epoch": 0.33,
163
+ "learning_rate": 8.37092731829574e-05,
164
+ "loss": 0.776,
165
+ "step": 260
166
+ },
167
+ {
168
+ "epoch": 0.34,
169
+ "learning_rate": 8.308270676691729e-05,
170
+ "loss": 0.7976,
171
+ "step": 270
172
+ },
173
+ {
174
+ "epoch": 0.35,
175
+ "learning_rate": 8.24561403508772e-05,
176
+ "loss": 0.8036,
177
+ "step": 280
178
+ },
179
+ {
180
+ "epoch": 0.36,
181
+ "learning_rate": 8.182957393483711e-05,
182
+ "loss": 0.7369,
183
+ "step": 290
184
+ },
185
+ {
186
+ "epoch": 0.38,
187
+ "learning_rate": 8.120300751879699e-05,
188
+ "loss": 0.881,
189
+ "step": 300
190
+ },
191
+ {
192
+ "epoch": 0.39,
193
+ "learning_rate": 8.05764411027569e-05,
194
+ "loss": 0.683,
195
+ "step": 310
196
+ },
197
+ {
198
+ "epoch": 0.4,
199
+ "learning_rate": 7.994987468671679e-05,
200
+ "loss": 0.7664,
201
+ "step": 320
202
+ },
203
+ {
204
+ "epoch": 0.41,
205
+ "learning_rate": 7.93233082706767e-05,
206
+ "loss": 0.7325,
207
+ "step": 330
208
+ },
209
+ {
210
+ "epoch": 0.43,
211
+ "learning_rate": 7.869674185463659e-05,
212
+ "loss": 0.8024,
213
+ "step": 340
214
+ },
215
+ {
216
+ "epoch": 0.44,
217
+ "learning_rate": 7.807017543859649e-05,
218
+ "loss": 0.7889,
219
+ "step": 350
220
+ },
221
+ {
222
+ "epoch": 0.45,
223
+ "learning_rate": 7.74436090225564e-05,
224
+ "loss": 0.7871,
225
+ "step": 360
226
+ },
227
+ {
228
+ "epoch": 0.46,
229
+ "learning_rate": 7.68170426065163e-05,
230
+ "loss": 0.7085,
231
+ "step": 370
232
+ },
233
+ {
234
+ "epoch": 0.48,
235
+ "learning_rate": 7.619047619047618e-05,
236
+ "loss": 0.8221,
237
+ "step": 380
238
+ },
239
+ {
240
+ "epoch": 0.49,
241
+ "learning_rate": 7.556390977443609e-05,
242
+ "loss": 0.8302,
243
+ "step": 390
244
+ },
245
+ {
246
+ "epoch": 0.5,
247
+ "learning_rate": 7.4937343358396e-05,
248
+ "loss": 0.6974,
249
+ "step": 400
250
+ },
251
+ {
252
+ "epoch": 0.51,
253
+ "learning_rate": 7.43107769423559e-05,
254
+ "loss": 0.6954,
255
+ "step": 410
256
+ },
257
+ {
258
+ "epoch": 0.53,
259
+ "learning_rate": 7.368421052631579e-05,
260
+ "loss": 0.7266,
261
+ "step": 420
262
+ },
263
+ {
264
+ "epoch": 0.54,
265
+ "learning_rate": 7.30576441102757e-05,
266
+ "loss": 0.7308,
267
+ "step": 430
268
+ },
269
+ {
270
+ "epoch": 0.55,
271
+ "learning_rate": 7.243107769423559e-05,
272
+ "loss": 0.7764,
273
+ "step": 440
274
+ },
275
+ {
276
+ "epoch": 0.56,
277
+ "learning_rate": 7.18045112781955e-05,
278
+ "loss": 0.7621,
279
+ "step": 450
280
+ },
281
+ {
282
+ "epoch": 0.58,
283
+ "learning_rate": 7.11779448621554e-05,
284
+ "loss": 0.7511,
285
+ "step": 460
286
+ },
287
+ {
288
+ "epoch": 0.59,
289
+ "learning_rate": 7.055137844611529e-05,
290
+ "loss": 0.6622,
291
+ "step": 470
292
+ },
293
+ {
294
+ "epoch": 0.6,
295
+ "learning_rate": 6.99248120300752e-05,
296
+ "loss": 0.7743,
297
+ "step": 480
298
+ },
299
+ {
300
+ "epoch": 0.61,
301
+ "learning_rate": 6.929824561403509e-05,
302
+ "loss": 0.7547,
303
+ "step": 490
304
+ },
305
+ {
306
+ "epoch": 0.63,
307
+ "learning_rate": 6.867167919799499e-05,
308
+ "loss": 0.758,
309
+ "step": 500
310
+ },
311
+ {
312
+ "epoch": 0.64,
313
+ "learning_rate": 6.80451127819549e-05,
314
+ "loss": 0.7378,
315
+ "step": 510
316
+ },
317
+ {
318
+ "epoch": 0.65,
319
+ "learning_rate": 6.741854636591479e-05,
320
+ "loss": 0.7583,
321
+ "step": 520
322
+ },
323
+ {
324
+ "epoch": 0.66,
325
+ "learning_rate": 6.67919799498747e-05,
326
+ "loss": 0.7618,
327
+ "step": 530
328
+ },
329
+ {
330
+ "epoch": 0.68,
331
+ "learning_rate": 6.616541353383459e-05,
332
+ "loss": 0.6799,
333
+ "step": 540
334
+ },
335
+ {
336
+ "epoch": 0.69,
337
+ "learning_rate": 6.553884711779449e-05,
338
+ "loss": 0.6106,
339
+ "step": 550
340
+ },
341
+ {
342
+ "epoch": 0.7,
343
+ "learning_rate": 6.49122807017544e-05,
344
+ "loss": 0.7618,
345
+ "step": 560
346
+ },
347
+ {
348
+ "epoch": 0.71,
349
+ "learning_rate": 6.428571428571429e-05,
350
+ "loss": 0.6979,
351
+ "step": 570
352
+ },
353
+ {
354
+ "epoch": 0.73,
355
+ "learning_rate": 6.365914786967418e-05,
356
+ "loss": 0.6811,
357
+ "step": 580
358
+ },
359
+ {
360
+ "epoch": 0.74,
361
+ "learning_rate": 6.303258145363409e-05,
362
+ "loss": 0.7091,
363
+ "step": 590
364
+ },
365
+ {
366
+ "epoch": 0.75,
367
+ "learning_rate": 6.240601503759398e-05,
368
+ "loss": 0.7377,
369
+ "step": 600
370
+ },
371
+ {
372
+ "epoch": 0.76,
373
+ "learning_rate": 6.177944862155389e-05,
374
+ "loss": 0.6556,
375
+ "step": 610
376
+ },
377
+ {
378
+ "epoch": 0.78,
379
+ "learning_rate": 6.115288220551379e-05,
380
+ "loss": 0.6782,
381
+ "step": 620
382
+ },
383
+ {
384
+ "epoch": 0.79,
385
+ "learning_rate": 6.052631578947369e-05,
386
+ "loss": 0.7018,
387
+ "step": 630
388
+ },
389
+ {
390
+ "epoch": 0.8,
391
+ "learning_rate": 5.989974937343359e-05,
392
+ "loss": 0.7544,
393
+ "step": 640
394
+ },
395
+ {
396
+ "epoch": 0.81,
397
+ "learning_rate": 5.927318295739349e-05,
398
+ "loss": 0.7339,
399
+ "step": 650
400
+ },
401
+ {
402
+ "epoch": 0.83,
403
+ "learning_rate": 5.8646616541353386e-05,
404
+ "loss": 0.6871,
405
+ "step": 660
406
+ },
407
+ {
408
+ "epoch": 0.84,
409
+ "learning_rate": 5.802005012531329e-05,
410
+ "loss": 0.7215,
411
+ "step": 670
412
+ },
413
+ {
414
+ "epoch": 0.85,
415
+ "learning_rate": 5.739348370927319e-05,
416
+ "loss": 0.6803,
417
+ "step": 680
418
+ },
419
+ {
420
+ "epoch": 0.86,
421
+ "learning_rate": 5.676691729323309e-05,
422
+ "loss": 0.6658,
423
+ "step": 690
424
+ },
425
+ {
426
+ "epoch": 0.88,
427
+ "learning_rate": 5.6140350877192984e-05,
428
+ "loss": 0.5731,
429
+ "step": 700
430
+ },
431
+ {
432
+ "epoch": 0.89,
433
+ "learning_rate": 5.5513784461152885e-05,
434
+ "loss": 0.6081,
435
+ "step": 710
436
+ },
437
+ {
438
+ "epoch": 0.9,
439
+ "learning_rate": 5.4887218045112786e-05,
440
+ "loss": 0.7074,
441
+ "step": 720
442
+ },
443
+ {
444
+ "epoch": 0.91,
445
+ "learning_rate": 5.426065162907269e-05,
446
+ "loss": 0.7139,
447
+ "step": 730
448
+ },
449
+ {
450
+ "epoch": 0.93,
451
+ "learning_rate": 5.363408521303258e-05,
452
+ "loss": 0.6573,
453
+ "step": 740
454
+ },
455
+ {
456
+ "epoch": 0.94,
457
+ "learning_rate": 5.300751879699248e-05,
458
+ "loss": 0.612,
459
+ "step": 750
460
+ },
461
+ {
462
+ "epoch": 0.95,
463
+ "learning_rate": 5.2380952380952384e-05,
464
+ "loss": 0.6389,
465
+ "step": 760
466
+ },
467
+ {
468
+ "epoch": 0.96,
469
+ "learning_rate": 5.1754385964912286e-05,
470
+ "loss": 0.6942,
471
+ "step": 770
472
+ },
473
+ {
474
+ "epoch": 0.98,
475
+ "learning_rate": 5.112781954887218e-05,
476
+ "loss": 0.6322,
477
+ "step": 780
478
+ },
479
+ {
480
+ "epoch": 0.99,
481
+ "learning_rate": 5.050125313283208e-05,
482
+ "loss": 0.6333,
483
+ "step": 790
484
+ },
485
+ {
486
+ "epoch": 1.0,
487
+ "learning_rate": 4.987468671679198e-05,
488
+ "loss": 0.6327,
489
+ "step": 800
490
+ },
491
+ {
492
+ "epoch": 1.02,
493
+ "learning_rate": 4.9248120300751884e-05,
494
+ "loss": 0.5053,
495
+ "step": 810
496
+ },
497
+ {
498
+ "epoch": 1.03,
499
+ "learning_rate": 4.862155388471178e-05,
500
+ "loss": 0.4566,
501
+ "step": 820
502
+ },
503
+ {
504
+ "epoch": 1.04,
505
+ "learning_rate": 4.799498746867168e-05,
506
+ "loss": 0.503,
507
+ "step": 830
508
+ },
509
+ {
510
+ "epoch": 1.05,
511
+ "learning_rate": 4.736842105263158e-05,
512
+ "loss": 0.5174,
513
+ "step": 840
514
+ },
515
+ {
516
+ "epoch": 1.07,
517
+ "learning_rate": 4.674185463659148e-05,
518
+ "loss": 0.4601,
519
+ "step": 850
520
+ },
521
+ {
522
+ "epoch": 1.08,
523
+ "learning_rate": 4.6115288220551377e-05,
524
+ "loss": 0.5172,
525
+ "step": 860
526
+ },
527
+ {
528
+ "epoch": 1.09,
529
+ "learning_rate": 4.548872180451128e-05,
530
+ "loss": 0.4515,
531
+ "step": 870
532
+ },
533
+ {
534
+ "epoch": 1.1,
535
+ "learning_rate": 4.486215538847118e-05,
536
+ "loss": 0.4852,
537
+ "step": 880
538
+ },
539
+ {
540
+ "epoch": 1.12,
541
+ "learning_rate": 4.423558897243108e-05,
542
+ "loss": 0.5067,
543
+ "step": 890
544
+ },
545
+ {
546
+ "epoch": 1.13,
547
+ "learning_rate": 4.3609022556390975e-05,
548
+ "loss": 0.4752,
549
+ "step": 900
550
+ },
551
+ {
552
+ "epoch": 1.14,
553
+ "learning_rate": 4.298245614035088e-05,
554
+ "loss": 0.4933,
555
+ "step": 910
556
+ },
557
+ {
558
+ "epoch": 1.15,
559
+ "learning_rate": 4.235588972431078e-05,
560
+ "loss": 0.4793,
561
+ "step": 920
562
+ },
563
+ {
564
+ "epoch": 1.17,
565
+ "learning_rate": 4.172932330827068e-05,
566
+ "loss": 0.4608,
567
+ "step": 930
568
+ },
569
+ {
570
+ "epoch": 1.18,
571
+ "learning_rate": 4.110275689223057e-05,
572
+ "loss": 0.43,
573
+ "step": 940
574
+ },
575
+ {
576
+ "epoch": 1.19,
577
+ "learning_rate": 4.047619047619048e-05,
578
+ "loss": 0.4715,
579
+ "step": 950
580
+ },
581
+ {
582
+ "epoch": 1.2,
583
+ "learning_rate": 3.9849624060150376e-05,
584
+ "loss": 0.4629,
585
+ "step": 960
586
+ },
587
+ {
588
+ "epoch": 1.22,
589
+ "learning_rate": 3.922305764411028e-05,
590
+ "loss": 0.4755,
591
+ "step": 970
592
+ },
593
+ {
594
+ "epoch": 1.23,
595
+ "learning_rate": 3.859649122807018e-05,
596
+ "loss": 0.5432,
597
+ "step": 980
598
+ },
599
+ {
600
+ "epoch": 1.24,
601
+ "learning_rate": 3.796992481203008e-05,
602
+ "loss": 0.5436,
603
+ "step": 990
604
+ },
605
+ {
606
+ "epoch": 1.25,
607
+ "learning_rate": 3.7343358395989974e-05,
608
+ "loss": 0.581,
609
+ "step": 1000
610
+ },
611
+ {
612
+ "epoch": 1.25,
613
+ "eval_accuracy": 0.7335398915569326,
614
+ "eval_loss": 0.6497328281402588,
615
+ "eval_runtime": 159.466,
616
+ "eval_samples_per_second": 24.287,
617
+ "eval_steps_per_second": 3.041,
618
+ "step": 1000
619
+ },
620
+ {
621
+ "epoch": 1.27,
622
+ "learning_rate": 3.6716791979949875e-05,
623
+ "loss": 0.4371,
624
+ "step": 1010
625
+ },
626
+ {
627
+ "epoch": 1.28,
628
+ "learning_rate": 3.6090225563909776e-05,
629
+ "loss": 0.4285,
630
+ "step": 1020
631
+ },
632
+ {
633
+ "epoch": 1.29,
634
+ "learning_rate": 3.546365914786968e-05,
635
+ "loss": 0.455,
636
+ "step": 1030
637
+ },
638
+ {
639
+ "epoch": 1.3,
640
+ "learning_rate": 3.483709273182957e-05,
641
+ "loss": 0.4088,
642
+ "step": 1040
643
+ },
644
+ {
645
+ "epoch": 1.32,
646
+ "learning_rate": 3.421052631578947e-05,
647
+ "loss": 0.5105,
648
+ "step": 1050
649
+ },
650
+ {
651
+ "epoch": 1.33,
652
+ "learning_rate": 3.3583959899749374e-05,
653
+ "loss": 0.4458,
654
+ "step": 1060
655
+ },
656
+ {
657
+ "epoch": 1.34,
658
+ "learning_rate": 3.2957393483709276e-05,
659
+ "loss": 0.4961,
660
+ "step": 1070
661
+ },
662
+ {
663
+ "epoch": 1.35,
664
+ "learning_rate": 3.233082706766917e-05,
665
+ "loss": 0.5237,
666
+ "step": 1080
667
+ },
668
+ {
669
+ "epoch": 1.37,
670
+ "learning_rate": 3.170426065162908e-05,
671
+ "loss": 0.4457,
672
+ "step": 1090
673
+ },
674
+ {
675
+ "epoch": 1.38,
676
+ "learning_rate": 3.107769423558897e-05,
677
+ "loss": 0.4075,
678
+ "step": 1100
679
+ },
680
+ {
681
+ "epoch": 1.39,
682
+ "learning_rate": 3.0451127819548874e-05,
683
+ "loss": 0.477,
684
+ "step": 1110
685
+ },
686
+ {
687
+ "epoch": 1.4,
688
+ "learning_rate": 2.9824561403508772e-05,
689
+ "loss": 0.3818,
690
+ "step": 1120
691
+ },
692
+ {
693
+ "epoch": 1.42,
694
+ "learning_rate": 2.9197994987468673e-05,
695
+ "loss": 0.4805,
696
+ "step": 1130
697
+ },
698
+ {
699
+ "epoch": 1.43,
700
+ "learning_rate": 2.857142857142857e-05,
701
+ "loss": 0.4489,
702
+ "step": 1140
703
+ },
704
+ {
705
+ "epoch": 1.44,
706
+ "learning_rate": 2.7944862155388472e-05,
707
+ "loss": 0.4282,
708
+ "step": 1150
709
+ },
710
+ {
711
+ "epoch": 1.45,
712
+ "learning_rate": 2.731829573934837e-05,
713
+ "loss": 0.4145,
714
+ "step": 1160
715
+ },
716
+ {
717
+ "epoch": 1.47,
718
+ "learning_rate": 2.6691729323308275e-05,
719
+ "loss": 0.3852,
720
+ "step": 1170
721
+ },
722
+ {
723
+ "epoch": 1.48,
724
+ "learning_rate": 2.606516290726817e-05,
725
+ "loss": 0.4014,
726
+ "step": 1180
727
+ },
728
+ {
729
+ "epoch": 1.49,
730
+ "learning_rate": 2.550125313283208e-05,
731
+ "loss": 0.4444,
732
+ "step": 1190
733
+ },
734
+ {
735
+ "epoch": 1.5,
736
+ "learning_rate": 2.487468671679198e-05,
737
+ "loss": 0.4709,
738
+ "step": 1200
739
+ },
740
+ {
741
+ "epoch": 1.52,
742
+ "learning_rate": 2.424812030075188e-05,
743
+ "loss": 0.4012,
744
+ "step": 1210
745
+ },
746
+ {
747
+ "epoch": 1.53,
748
+ "learning_rate": 2.362155388471178e-05,
749
+ "loss": 0.4174,
750
+ "step": 1220
751
+ },
752
+ {
753
+ "epoch": 1.54,
754
+ "learning_rate": 2.2994987468671682e-05,
755
+ "loss": 0.4309,
756
+ "step": 1230
757
+ },
758
+ {
759
+ "epoch": 1.55,
760
+ "learning_rate": 2.236842105263158e-05,
761
+ "loss": 0.4216,
762
+ "step": 1240
763
+ },
764
+ {
765
+ "epoch": 1.57,
766
+ "learning_rate": 2.174185463659148e-05,
767
+ "loss": 0.3635,
768
+ "step": 1250
769
+ },
770
+ {
771
+ "epoch": 1.58,
772
+ "learning_rate": 2.111528822055138e-05,
773
+ "loss": 0.4032,
774
+ "step": 1260
775
+ },
776
+ {
777
+ "epoch": 1.59,
778
+ "learning_rate": 2.048872180451128e-05,
779
+ "loss": 0.3429,
780
+ "step": 1270
781
+ },
782
+ {
783
+ "epoch": 1.6,
784
+ "learning_rate": 1.9862155388471178e-05,
785
+ "loss": 0.3645,
786
+ "step": 1280
787
+ },
788
+ {
789
+ "epoch": 1.62,
790
+ "learning_rate": 1.923558897243108e-05,
791
+ "loss": 0.4039,
792
+ "step": 1290
793
+ },
794
+ {
795
+ "epoch": 1.63,
796
+ "learning_rate": 1.860902255639098e-05,
797
+ "loss": 0.393,
798
+ "step": 1300
799
+ },
800
+ {
801
+ "epoch": 1.64,
802
+ "learning_rate": 1.7982456140350878e-05,
803
+ "loss": 0.4526,
804
+ "step": 1310
805
+ },
806
+ {
807
+ "epoch": 1.65,
808
+ "learning_rate": 1.735588972431078e-05,
809
+ "loss": 0.4054,
810
+ "step": 1320
811
+ },
812
+ {
813
+ "epoch": 1.67,
814
+ "learning_rate": 1.6729323308270677e-05,
815
+ "loss": 0.3888,
816
+ "step": 1330
817
+ },
818
+ {
819
+ "epoch": 1.68,
820
+ "learning_rate": 1.610275689223058e-05,
821
+ "loss": 0.4117,
822
+ "step": 1340
823
+ },
824
+ {
825
+ "epoch": 1.69,
826
+ "learning_rate": 1.5476190476190476e-05,
827
+ "loss": 0.404,
828
+ "step": 1350
829
+ },
830
+ {
831
+ "epoch": 1.7,
832
+ "learning_rate": 1.4849624060150378e-05,
833
+ "loss": 0.434,
834
+ "step": 1360
835
+ },
836
+ {
837
+ "epoch": 1.72,
838
+ "learning_rate": 1.4223057644110277e-05,
839
+ "loss": 0.4473,
840
+ "step": 1370
841
+ },
842
+ {
843
+ "epoch": 1.73,
844
+ "learning_rate": 1.3596491228070177e-05,
845
+ "loss": 0.3732,
846
+ "step": 1380
847
+ },
848
+ {
849
+ "epoch": 1.74,
850
+ "learning_rate": 1.2969924812030076e-05,
851
+ "loss": 0.3367,
852
+ "step": 1390
853
+ },
854
+ {
855
+ "epoch": 1.75,
856
+ "learning_rate": 1.2343358395989974e-05,
857
+ "loss": 0.3256,
858
+ "step": 1400
859
+ },
860
+ {
861
+ "epoch": 1.77,
862
+ "learning_rate": 1.1716791979949874e-05,
863
+ "loss": 0.347,
864
+ "step": 1410
865
+ },
866
+ {
867
+ "epoch": 1.78,
868
+ "learning_rate": 1.1090225563909775e-05,
869
+ "loss": 0.4329,
870
+ "step": 1420
871
+ },
872
+ {
873
+ "epoch": 1.79,
874
+ "learning_rate": 1.0463659147869675e-05,
875
+ "loss": 0.348,
876
+ "step": 1430
877
+ },
878
+ {
879
+ "epoch": 1.8,
880
+ "learning_rate": 9.837092731829574e-06,
881
+ "loss": 0.3269,
882
+ "step": 1440
883
+ },
884
+ {
885
+ "epoch": 1.82,
886
+ "learning_rate": 9.210526315789474e-06,
887
+ "loss": 0.3124,
888
+ "step": 1450
889
+ },
890
+ {
891
+ "epoch": 1.83,
892
+ "learning_rate": 8.583959899749373e-06,
893
+ "loss": 0.3806,
894
+ "step": 1460
895
+ },
896
+ {
897
+ "epoch": 1.84,
898
+ "learning_rate": 7.957393483709273e-06,
899
+ "loss": 0.4165,
900
+ "step": 1470
901
+ },
902
+ {
903
+ "epoch": 1.85,
904
+ "learning_rate": 7.330827067669173e-06,
905
+ "loss": 0.3784,
906
+ "step": 1480
907
+ },
908
+ {
909
+ "epoch": 1.87,
910
+ "learning_rate": 6.704260651629073e-06,
911
+ "loss": 0.3618,
912
+ "step": 1490
913
+ },
914
+ {
915
+ "epoch": 1.88,
916
+ "learning_rate": 6.077694235588972e-06,
917
+ "loss": 0.3688,
918
+ "step": 1500
919
+ },
920
+ {
921
+ "epoch": 1.89,
922
+ "learning_rate": 5.451127819548872e-06,
923
+ "loss": 0.3775,
924
+ "step": 1510
925
+ },
926
+ {
927
+ "epoch": 1.9,
928
+ "learning_rate": 4.824561403508772e-06,
929
+ "loss": 0.3975,
930
+ "step": 1520
931
+ },
932
+ {
933
+ "epoch": 1.92,
934
+ "learning_rate": 4.197994987468672e-06,
935
+ "loss": 0.3594,
936
+ "step": 1530
937
+ },
938
+ {
939
+ "epoch": 1.93,
940
+ "learning_rate": 3.5714285714285714e-06,
941
+ "loss": 0.3771,
942
+ "step": 1540
943
+ },
944
+ {
945
+ "epoch": 1.94,
946
+ "learning_rate": 2.9448621553884713e-06,
947
+ "loss": 0.3319,
948
+ "step": 1550
949
+ },
950
+ {
951
+ "epoch": 1.95,
952
+ "learning_rate": 2.318295739348371e-06,
953
+ "loss": 0.3762,
954
+ "step": 1560
955
+ },
956
+ {
957
+ "epoch": 1.97,
958
+ "learning_rate": 1.6917293233082707e-06,
959
+ "loss": 0.3299,
960
+ "step": 1570
961
+ },
962
+ {
963
+ "epoch": 1.98,
964
+ "learning_rate": 1.0651629072681704e-06,
965
+ "loss": 0.3763,
966
+ "step": 1580
967
+ },
968
+ {
969
+ "epoch": 1.99,
970
+ "learning_rate": 4.385964912280702e-07,
971
+ "loss": 0.3268,
972
+ "step": 1590
973
+ },
974
+ {
975
+ "epoch": 2.0,
976
+ "step": 1596,
977
+ "total_flos": 1.5287325828267258e+19,
978
+ "train_loss": 0.5975370544538761,
979
+ "train_runtime": 3758.5978,
980
+ "train_samples_per_second": 14.848,
981
+ "train_steps_per_second": 0.425
982
+ }
983
+ ],
984
+ "logging_steps": 10,
985
+ "max_steps": 1596,
986
+ "num_train_epochs": 2,
987
+ "save_steps": 1000,
988
+ "total_flos": 1.5287325828267258e+19,
989
+ "trial_name": null,
990
+ "trial_params": null
991
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70f4ec8c6231eb34700694a3b1d4af48ef35fccf7e478e5d37b5dab3dd63e373
3
+ size 4600