rileybol commited on
Commit
268998a
1 Parent(s): 21acf39

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ tags:
4
+ - autotrain
5
+ - image-classification
6
+ widget:
7
+ - src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/tiger.jpg
8
+ example_title: Tiger
9
+ - src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/teapot.jpg
10
+ example_title: Teapot
11
+ - src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/palace.jpg
12
+ example_title: Palace
13
+ ---
14
+
15
+ # Model Trained Using AutoTrain
16
+
17
+ - Problem type: Image Classification
18
+
19
+ ## Validation Metrics
20
+ loss: 0.011031342670321465
21
+
22
+ f1_macro: 1.0
23
+
24
+ f1_micro: 1.0
25
+
26
+ f1_weighted: 1.0
27
+
28
+ precision_macro: 1.0
29
+
30
+ precision_micro: 1.0
31
+
32
+ precision_weighted: 1.0
33
+
34
+ recall_macro: 1.0
35
+
36
+ recall_micro: 1.0
37
+
38
+ recall_weighted: 1.0
39
+
40
+ accuracy: 1.0
checkpoint-3839/config.json ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224",
3
+ "_num_labels": 83,
4
+ "architectures": [
5
+ "ViTForImageClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.0,
8
+ "encoder_stride": 16,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.0,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "Agnes Skinner",
14
+ "1": "Alec Baldwin",
15
+ "2": "Alex Whitney",
16
+ "3": "Allison Taylor",
17
+ "4": "Apu Nahasapeemapetilon",
18
+ "5": "Artie Ziff",
19
+ "6": "Ashley Grant",
20
+ "7": "Ballet Teacher",
21
+ "8": "Barney Gumble",
22
+ "9": "Bart Simpson",
23
+ "10": "Bernice Hibbert",
24
+ "11": "Carl Carlson",
25
+ "12": "Chief Wiggum",
26
+ "13": "Cletus Spuckler",
27
+ "14": "Comic Book Guy",
28
+ "15": "Dewey Largo",
29
+ "16": "Doctor Hibbert",
30
+ "17": "Doctor Nick",
31
+ "18": "Dolph Starbeam",
32
+ "19": "Drederick Tatum",
33
+ "20": "Edna Krabappel",
34
+ "21": "Fat Tony",
35
+ "22": "Focus Group Manager",
36
+ "23": "Gil Gunderson",
37
+ "24": "Grandpa Simpson",
38
+ "25": "Groundskeeper Willie",
39
+ "26": "Hank Scorpio",
40
+ "27": "Hans Moleman",
41
+ "28": "Helen Lovejoy",
42
+ "29": "Homer Simpson",
43
+ "30": "Jailbird Snake",
44
+ "31": "Janey Powell",
45
+ "32": "Jasper Beardsley",
46
+ "33": "Jessica Lovejoy",
47
+ "34": "Jimbo Jones",
48
+ "35": "Kearney Zzyzwicz",
49
+ "36": "Kent Brockman",
50
+ "37": "Kim Basinger",
51
+ "38": "Kirk Van Houten",
52
+ "39": "Krusty the Clown",
53
+ "40": "Larry Burns",
54
+ "41": "Laura Powers",
55
+ "42": "Lenny Leonard",
56
+ "43": "Lewis Clark",
57
+ "44": "Lindsey Naegle",
58
+ "45": "Lionel Hutz",
59
+ "46": "Lisa Simpson",
60
+ "47": "Luann Van Houten",
61
+ "48": "Maggie Simpson",
62
+ "49": "Manjula Nahasapeemapetilon",
63
+ "50": "Marge Simpson",
64
+ "51": "Martin Prince",
65
+ "52": "Maude Flanders",
66
+ "53": "Mayor Quimby",
67
+ "54": "Milhouse Van Houten",
68
+ "55": "Mindy Simmons",
69
+ "56": "Miss Hoover",
70
+ "57": "Moe Szyslak",
71
+ "58": "Mr Burns",
72
+ "59": "Ned Flanders",
73
+ "60": "Nelson Muntz",
74
+ "61": "Nerd Database",
75
+ "62": "Officer Eddie",
76
+ "63": "Officer Lou",
77
+ "64": "Otto Mann",
78
+ "65": "Patty Bouvier",
79
+ "66": "Principal Skinner",
80
+ "67": "Professor Frink",
81
+ "68": "Rainier Wolfcastle",
82
+ "69": "Ralph Wiggum",
83
+ "70": "Reverend Lovejoy",
84
+ "71": "Rod Flanders",
85
+ "72": "Ruth Powers",
86
+ "73": "Sea Captian",
87
+ "74": "Selma Bouvier",
88
+ "75": "Sherri and Terri",
89
+ "76": "Sideshow Bob",
90
+ "77": "Sideshow Mel",
91
+ "78": "Superintendent Chalmers",
92
+ "79": "Todd Flanders",
93
+ "80": "Troy McClure",
94
+ "81": "Waylon Smithers",
95
+ "82": "Wendell Borton"
96
+ },
97
+ "image_size": 224,
98
+ "initializer_range": 0.02,
99
+ "intermediate_size": 3072,
100
+ "label2id": {
101
+ "Agnes Skinner": 0,
102
+ "Alec Baldwin": 1,
103
+ "Alex Whitney": 2,
104
+ "Allison Taylor": 3,
105
+ "Apu Nahasapeemapetilon": 4,
106
+ "Artie Ziff": 5,
107
+ "Ashley Grant": 6,
108
+ "Ballet Teacher": 7,
109
+ "Barney Gumble": 8,
110
+ "Bart Simpson": 9,
111
+ "Bernice Hibbert": 10,
112
+ "Carl Carlson": 11,
113
+ "Chief Wiggum": 12,
114
+ "Cletus Spuckler": 13,
115
+ "Comic Book Guy": 14,
116
+ "Dewey Largo": 15,
117
+ "Doctor Hibbert": 16,
118
+ "Doctor Nick": 17,
119
+ "Dolph Starbeam": 18,
120
+ "Drederick Tatum": 19,
121
+ "Edna Krabappel": 20,
122
+ "Fat Tony": 21,
123
+ "Focus Group Manager": 22,
124
+ "Gil Gunderson": 23,
125
+ "Grandpa Simpson": 24,
126
+ "Groundskeeper Willie": 25,
127
+ "Hank Scorpio": 26,
128
+ "Hans Moleman": 27,
129
+ "Helen Lovejoy": 28,
130
+ "Homer Simpson": 29,
131
+ "Jailbird Snake": 30,
132
+ "Janey Powell": 31,
133
+ "Jasper Beardsley": 32,
134
+ "Jessica Lovejoy": 33,
135
+ "Jimbo Jones": 34,
136
+ "Kearney Zzyzwicz": 35,
137
+ "Kent Brockman": 36,
138
+ "Kim Basinger": 37,
139
+ "Kirk Van Houten": 38,
140
+ "Krusty the Clown": 39,
141
+ "Larry Burns": 40,
142
+ "Laura Powers": 41,
143
+ "Lenny Leonard": 42,
144
+ "Lewis Clark": 43,
145
+ "Lindsey Naegle": 44,
146
+ "Lionel Hutz": 45,
147
+ "Lisa Simpson": 46,
148
+ "Luann Van Houten": 47,
149
+ "Maggie Simpson": 48,
150
+ "Manjula Nahasapeemapetilon": 49,
151
+ "Marge Simpson": 50,
152
+ "Martin Prince": 51,
153
+ "Maude Flanders": 52,
154
+ "Mayor Quimby": 53,
155
+ "Milhouse Van Houten": 54,
156
+ "Mindy Simmons": 55,
157
+ "Miss Hoover": 56,
158
+ "Moe Szyslak": 57,
159
+ "Mr Burns": 58,
160
+ "Ned Flanders": 59,
161
+ "Nelson Muntz": 60,
162
+ "Nerd Database": 61,
163
+ "Officer Eddie": 62,
164
+ "Officer Lou": 63,
165
+ "Otto Mann": 64,
166
+ "Patty Bouvier": 65,
167
+ "Principal Skinner": 66,
168
+ "Professor Frink": 67,
169
+ "Rainier Wolfcastle": 68,
170
+ "Ralph Wiggum": 69,
171
+ "Reverend Lovejoy": 70,
172
+ "Rod Flanders": 71,
173
+ "Ruth Powers": 72,
174
+ "Sea Captian": 73,
175
+ "Selma Bouvier": 74,
176
+ "Sherri and Terri": 75,
177
+ "Sideshow Bob": 76,
178
+ "Sideshow Mel": 77,
179
+ "Superintendent Chalmers": 78,
180
+ "Todd Flanders": 79,
181
+ "Troy McClure": 80,
182
+ "Waylon Smithers": 81,
183
+ "Wendell Borton": 82
184
+ },
185
+ "layer_norm_eps": 1e-12,
186
+ "model_type": "vit",
187
+ "num_attention_heads": 12,
188
+ "num_channels": 3,
189
+ "num_hidden_layers": 12,
190
+ "patch_size": 16,
191
+ "problem_type": "single_label_classification",
192
+ "qkv_bias": true,
193
+ "torch_dtype": "float32",
194
+ "transformers_version": "4.41.0"
195
+ }
checkpoint-3839/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68313e8f772280708ca9f54261e838fd8237b2d20f4e82079e5e2b5ef3bed02f
3
+ size 343473140
checkpoint-3839/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49b8fc20b1a72518867203aa4091f6c177b5640f0431dee6b4cee5b1ef29ab81
3
+ size 687067194
checkpoint-3839/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fc193a0b071cb0915cab365ef1cb8841cc170039089bd91190ace1109fefa81
3
+ size 14244
checkpoint-3839/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c5d4e93e348960ed9632357a4ff4796cdb428f23dbfb440b56d7dd2826a72ee
3
+ size 1064
checkpoint-3839/trainer_state.json ADDED
@@ -0,0 +1,1815 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.011031342670321465,
3
+ "best_model_checkpoint": "autotrain-mb2mv-qdf75/checkpoint-3839",
4
+ "epoch": 11.0,
5
+ "eval_steps": 500,
6
+ "global_step": 3839,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.04871060171919771,
13
+ "grad_norm": 12.027495384216309,
14
+ "learning_rate": 2.028639618138425e-06,
15
+ "loss": 4.4947,
16
+ "step": 17
17
+ },
18
+ {
19
+ "epoch": 0.09742120343839542,
20
+ "grad_norm": 11.511441230773926,
21
+ "learning_rate": 4.05727923627685e-06,
22
+ "loss": 4.4912,
23
+ "step": 34
24
+ },
25
+ {
26
+ "epoch": 0.14613180515759314,
27
+ "grad_norm": 12.500868797302246,
28
+ "learning_rate": 5.966587112171838e-06,
29
+ "loss": 4.4842,
30
+ "step": 51
31
+ },
32
+ {
33
+ "epoch": 0.19484240687679083,
34
+ "grad_norm": 11.219860076904297,
35
+ "learning_rate": 7.995226730310263e-06,
36
+ "loss": 4.4186,
37
+ "step": 68
38
+ },
39
+ {
40
+ "epoch": 0.24355300859598855,
41
+ "grad_norm": 11.510499000549316,
42
+ "learning_rate": 1.0023866348448688e-05,
43
+ "loss": 4.5182,
44
+ "step": 85
45
+ },
46
+ {
47
+ "epoch": 0.2922636103151863,
48
+ "grad_norm": 12.528426170349121,
49
+ "learning_rate": 1.2052505966587113e-05,
50
+ "loss": 4.4535,
51
+ "step": 102
52
+ },
53
+ {
54
+ "epoch": 0.34097421203438394,
55
+ "grad_norm": 11.191871643066406,
56
+ "learning_rate": 1.4081145584725539e-05,
57
+ "loss": 4.4233,
58
+ "step": 119
59
+ },
60
+ {
61
+ "epoch": 0.38968481375358166,
62
+ "grad_norm": 13.019774436950684,
63
+ "learning_rate": 1.6109785202863962e-05,
64
+ "loss": 4.3845,
65
+ "step": 136
66
+ },
67
+ {
68
+ "epoch": 0.4383954154727794,
69
+ "grad_norm": 11.506011962890625,
70
+ "learning_rate": 1.8138424821002386e-05,
71
+ "loss": 4.3334,
72
+ "step": 153
73
+ },
74
+ {
75
+ "epoch": 0.4871060171919771,
76
+ "grad_norm": 11.238750457763672,
77
+ "learning_rate": 2.0167064439140813e-05,
78
+ "loss": 4.2554,
79
+ "step": 170
80
+ },
81
+ {
82
+ "epoch": 0.5358166189111748,
83
+ "grad_norm": 11.771519660949707,
84
+ "learning_rate": 2.2195704057279237e-05,
85
+ "loss": 4.2028,
86
+ "step": 187
87
+ },
88
+ {
89
+ "epoch": 0.5845272206303725,
90
+ "grad_norm": 12.988870620727539,
91
+ "learning_rate": 2.4224343675417664e-05,
92
+ "loss": 4.0815,
93
+ "step": 204
94
+ },
95
+ {
96
+ "epoch": 0.6332378223495702,
97
+ "grad_norm": 12.157154083251953,
98
+ "learning_rate": 2.6252983293556088e-05,
99
+ "loss": 4.1055,
100
+ "step": 221
101
+ },
102
+ {
103
+ "epoch": 0.6819484240687679,
104
+ "grad_norm": 12.214086532592773,
105
+ "learning_rate": 2.828162291169451e-05,
106
+ "loss": 3.9383,
107
+ "step": 238
108
+ },
109
+ {
110
+ "epoch": 0.7306590257879656,
111
+ "grad_norm": 11.511253356933594,
112
+ "learning_rate": 3.031026252983294e-05,
113
+ "loss": 3.9894,
114
+ "step": 255
115
+ },
116
+ {
117
+ "epoch": 0.7793696275071633,
118
+ "grad_norm": 13.291036605834961,
119
+ "learning_rate": 3.233890214797136e-05,
120
+ "loss": 3.7944,
121
+ "step": 272
122
+ },
123
+ {
124
+ "epoch": 0.828080229226361,
125
+ "grad_norm": 13.204787254333496,
126
+ "learning_rate": 3.424821002386635e-05,
127
+ "loss": 3.7806,
128
+ "step": 289
129
+ },
130
+ {
131
+ "epoch": 0.8767908309455588,
132
+ "grad_norm": 13.473506927490234,
133
+ "learning_rate": 3.627684964200477e-05,
134
+ "loss": 3.6716,
135
+ "step": 306
136
+ },
137
+ {
138
+ "epoch": 0.9255014326647565,
139
+ "grad_norm": 12.780734062194824,
140
+ "learning_rate": 3.83054892601432e-05,
141
+ "loss": 3.4641,
142
+ "step": 323
143
+ },
144
+ {
145
+ "epoch": 0.9742120343839542,
146
+ "grad_norm": 20.36208724975586,
147
+ "learning_rate": 4.0334128878281626e-05,
148
+ "loss": 3.3018,
149
+ "step": 340
150
+ },
151
+ {
152
+ "epoch": 1.0,
153
+ "eval_accuracy": 0.43902439024390244,
154
+ "eval_f1_macro": 0.3762682151157721,
155
+ "eval_f1_micro": 0.43902439024390244,
156
+ "eval_f1_weighted": 0.38500671624754507,
157
+ "eval_loss": 2.9648730754852295,
158
+ "eval_precision_macro": 0.5231114557498497,
159
+ "eval_precision_micro": 0.43902439024390244,
160
+ "eval_precision_weighted": 0.5343145132587477,
161
+ "eval_recall_macro": 0.41928580121351205,
162
+ "eval_recall_micro": 0.43902439024390244,
163
+ "eval_recall_weighted": 0.43902439024390244,
164
+ "eval_runtime": 3.4206,
165
+ "eval_samples_per_second": 203.767,
166
+ "eval_steps_per_second": 12.863,
167
+ "step": 349
168
+ },
169
+ {
170
+ "epoch": 1.0229226361031518,
171
+ "grad_norm": 11.021674156188965,
172
+ "learning_rate": 4.236276849642005e-05,
173
+ "loss": 3.1839,
174
+ "step": 357
175
+ },
176
+ {
177
+ "epoch": 1.0716332378223496,
178
+ "grad_norm": 12.111454963684082,
179
+ "learning_rate": 4.4391408114558474e-05,
180
+ "loss": 3.0222,
181
+ "step": 374
182
+ },
183
+ {
184
+ "epoch": 1.1203438395415473,
185
+ "grad_norm": 12.602492332458496,
186
+ "learning_rate": 4.64200477326969e-05,
187
+ "loss": 2.8135,
188
+ "step": 391
189
+ },
190
+ {
191
+ "epoch": 1.1690544412607449,
192
+ "grad_norm": 11.924349784851074,
193
+ "learning_rate": 4.844868735083533e-05,
194
+ "loss": 2.6733,
195
+ "step": 408
196
+ },
197
+ {
198
+ "epoch": 1.2177650429799427,
199
+ "grad_norm": 11.714224815368652,
200
+ "learning_rate": 4.9946935526664904e-05,
201
+ "loss": 2.4497,
202
+ "step": 425
203
+ },
204
+ {
205
+ "epoch": 1.2664756446991405,
206
+ "grad_norm": 13.355523109436035,
207
+ "learning_rate": 4.972141151499072e-05,
208
+ "loss": 2.3633,
209
+ "step": 442
210
+ },
211
+ {
212
+ "epoch": 1.3151862464183381,
213
+ "grad_norm": 11.773361206054688,
214
+ "learning_rate": 4.949588750331653e-05,
215
+ "loss": 2.2063,
216
+ "step": 459
217
+ },
218
+ {
219
+ "epoch": 1.3638968481375358,
220
+ "grad_norm": 12.601265907287598,
221
+ "learning_rate": 4.9270363491642345e-05,
222
+ "loss": 2.2622,
223
+ "step": 476
224
+ },
225
+ {
226
+ "epoch": 1.4126074498567336,
227
+ "grad_norm": 11.489968299865723,
228
+ "learning_rate": 4.9044839479968165e-05,
229
+ "loss": 2.0349,
230
+ "step": 493
231
+ },
232
+ {
233
+ "epoch": 1.4613180515759312,
234
+ "grad_norm": 11.553853034973145,
235
+ "learning_rate": 4.881931546829398e-05,
236
+ "loss": 1.8452,
237
+ "step": 510
238
+ },
239
+ {
240
+ "epoch": 1.5100286532951288,
241
+ "grad_norm": 13.461709976196289,
242
+ "learning_rate": 4.85937914566198e-05,
243
+ "loss": 1.7711,
244
+ "step": 527
245
+ },
246
+ {
247
+ "epoch": 1.5587392550143266,
248
+ "grad_norm": 11.289037704467773,
249
+ "learning_rate": 4.8368267444945606e-05,
250
+ "loss": 1.562,
251
+ "step": 544
252
+ },
253
+ {
254
+ "epoch": 1.6074498567335245,
255
+ "grad_norm": 9.85034465789795,
256
+ "learning_rate": 4.814274343327143e-05,
257
+ "loss": 1.4258,
258
+ "step": 561
259
+ },
260
+ {
261
+ "epoch": 1.656160458452722,
262
+ "grad_norm": 8.626704216003418,
263
+ "learning_rate": 4.791721942159724e-05,
264
+ "loss": 1.2561,
265
+ "step": 578
266
+ },
267
+ {
268
+ "epoch": 1.7048710601719197,
269
+ "grad_norm": 8.728435516357422,
270
+ "learning_rate": 4.769169540992306e-05,
271
+ "loss": 1.229,
272
+ "step": 595
273
+ },
274
+ {
275
+ "epoch": 1.7535816618911175,
276
+ "grad_norm": 11.541463851928711,
277
+ "learning_rate": 4.7466171398248875e-05,
278
+ "loss": 1.175,
279
+ "step": 612
280
+ },
281
+ {
282
+ "epoch": 1.8022922636103151,
283
+ "grad_norm": 11.69316291809082,
284
+ "learning_rate": 4.724064738657469e-05,
285
+ "loss": 1.2961,
286
+ "step": 629
287
+ },
288
+ {
289
+ "epoch": 1.8510028653295127,
290
+ "grad_norm": 7.8675737380981445,
291
+ "learning_rate": 4.70151233749005e-05,
292
+ "loss": 0.9699,
293
+ "step": 646
294
+ },
295
+ {
296
+ "epoch": 1.8997134670487106,
297
+ "grad_norm": 7.240268230438232,
298
+ "learning_rate": 4.678959936322632e-05,
299
+ "loss": 1.235,
300
+ "step": 663
301
+ },
302
+ {
303
+ "epoch": 1.9484240687679084,
304
+ "grad_norm": 6.3042426109313965,
305
+ "learning_rate": 4.6564075351552136e-05,
306
+ "loss": 1.1121,
307
+ "step": 680
308
+ },
309
+ {
310
+ "epoch": 1.997134670487106,
311
+ "grad_norm": 8.941567420959473,
312
+ "learning_rate": 4.633855133987796e-05,
313
+ "loss": 0.9363,
314
+ "step": 697
315
+ },
316
+ {
317
+ "epoch": 2.0,
318
+ "eval_accuracy": 0.945480631276901,
319
+ "eval_f1_macro": 0.9425017115213628,
320
+ "eval_f1_micro": 0.945480631276901,
321
+ "eval_f1_weighted": 0.9445694655877982,
322
+ "eval_loss": 0.5551677346229553,
323
+ "eval_precision_macro": 0.9567204214794576,
324
+ "eval_precision_micro": 0.945480631276901,
325
+ "eval_precision_weighted": 0.9597403513113699,
326
+ "eval_recall_macro": 0.94451833307255,
327
+ "eval_recall_micro": 0.945480631276901,
328
+ "eval_recall_weighted": 0.945480631276901,
329
+ "eval_runtime": 3.3936,
330
+ "eval_samples_per_second": 205.389,
331
+ "eval_steps_per_second": 12.966,
332
+ "step": 698
333
+ },
334
+ {
335
+ "epoch": 2.0458452722063036,
336
+ "grad_norm": 4.50822114944458,
337
+ "learning_rate": 4.611302732820377e-05,
338
+ "loss": 1.0456,
339
+ "step": 714
340
+ },
341
+ {
342
+ "epoch": 2.0945558739255015,
343
+ "grad_norm": 5.0435686111450195,
344
+ "learning_rate": 4.5887503316529584e-05,
345
+ "loss": 0.5131,
346
+ "step": 731
347
+ },
348
+ {
349
+ "epoch": 2.1432664756446993,
350
+ "grad_norm": 5.728665351867676,
351
+ "learning_rate": 4.5661979304855405e-05,
352
+ "loss": 0.705,
353
+ "step": 748
354
+ },
355
+ {
356
+ "epoch": 2.1919770773638967,
357
+ "grad_norm": 4.446041584014893,
358
+ "learning_rate": 4.543645529318122e-05,
359
+ "loss": 0.6504,
360
+ "step": 765
361
+ },
362
+ {
363
+ "epoch": 2.2406876790830945,
364
+ "grad_norm": 5.601328372955322,
365
+ "learning_rate": 4.521093128150703e-05,
366
+ "loss": 0.6361,
367
+ "step": 782
368
+ },
369
+ {
370
+ "epoch": 2.2893982808022924,
371
+ "grad_norm": 6.830723285675049,
372
+ "learning_rate": 4.4985407269832846e-05,
373
+ "loss": 0.7554,
374
+ "step": 799
375
+ },
376
+ {
377
+ "epoch": 2.3381088825214897,
378
+ "grad_norm": 9.851592063903809,
379
+ "learning_rate": 4.4759883258158666e-05,
380
+ "loss": 0.6952,
381
+ "step": 816
382
+ },
383
+ {
384
+ "epoch": 2.3868194842406876,
385
+ "grad_norm": 12.239367485046387,
386
+ "learning_rate": 4.453435924648448e-05,
387
+ "loss": 0.7793,
388
+ "step": 833
389
+ },
390
+ {
391
+ "epoch": 2.4355300859598854,
392
+ "grad_norm": 4.4265360832214355,
393
+ "learning_rate": 4.43088352348103e-05,
394
+ "loss": 0.5841,
395
+ "step": 850
396
+ },
397
+ {
398
+ "epoch": 2.4842406876790832,
399
+ "grad_norm": 11.818037986755371,
400
+ "learning_rate": 4.4083311223136114e-05,
401
+ "loss": 0.6259,
402
+ "step": 867
403
+ },
404
+ {
405
+ "epoch": 2.532951289398281,
406
+ "grad_norm": 13.520539283752441,
407
+ "learning_rate": 4.385778721146193e-05,
408
+ "loss": 0.674,
409
+ "step": 884
410
+ },
411
+ {
412
+ "epoch": 2.5816618911174785,
413
+ "grad_norm": 12.981139183044434,
414
+ "learning_rate": 4.363226319978774e-05,
415
+ "loss": 0.7362,
416
+ "step": 901
417
+ },
418
+ {
419
+ "epoch": 2.6303724928366763,
420
+ "grad_norm": 12.412555694580078,
421
+ "learning_rate": 4.340673918811356e-05,
422
+ "loss": 0.557,
423
+ "step": 918
424
+ },
425
+ {
426
+ "epoch": 2.6790830945558737,
427
+ "grad_norm": 7.840790748596191,
428
+ "learning_rate": 4.3181215176439375e-05,
429
+ "loss": 0.6293,
430
+ "step": 935
431
+ },
432
+ {
433
+ "epoch": 2.7277936962750715,
434
+ "grad_norm": 4.981259822845459,
435
+ "learning_rate": 4.2955691164765196e-05,
436
+ "loss": 0.5793,
437
+ "step": 952
438
+ },
439
+ {
440
+ "epoch": 2.7765042979942693,
441
+ "grad_norm": 7.165767669677734,
442
+ "learning_rate": 4.2730167153091e-05,
443
+ "loss": 0.5705,
444
+ "step": 969
445
+ },
446
+ {
447
+ "epoch": 2.825214899713467,
448
+ "grad_norm": 6.7298736572265625,
449
+ "learning_rate": 4.250464314141682e-05,
450
+ "loss": 0.5446,
451
+ "step": 986
452
+ },
453
+ {
454
+ "epoch": 2.873925501432665,
455
+ "grad_norm": 7.268840789794922,
456
+ "learning_rate": 4.227911912974264e-05,
457
+ "loss": 0.4438,
458
+ "step": 1003
459
+ },
460
+ {
461
+ "epoch": 2.9226361031518624,
462
+ "grad_norm": 1.802043080329895,
463
+ "learning_rate": 4.205359511806846e-05,
464
+ "loss": 0.4892,
465
+ "step": 1020
466
+ },
467
+ {
468
+ "epoch": 2.9713467048710602,
469
+ "grad_norm": 10.247010231018066,
470
+ "learning_rate": 4.182807110639427e-05,
471
+ "loss": 0.4922,
472
+ "step": 1037
473
+ },
474
+ {
475
+ "epoch": 3.0,
476
+ "eval_accuracy": 0.9727403156384505,
477
+ "eval_f1_macro": 0.9705769072502259,
478
+ "eval_f1_micro": 0.9727403156384505,
479
+ "eval_f1_weighted": 0.9719785983620151,
480
+ "eval_loss": 0.17863501608371735,
481
+ "eval_precision_macro": 0.9795910916392845,
482
+ "eval_precision_micro": 0.9727403156384505,
483
+ "eval_precision_weighted": 0.9787328087184615,
484
+ "eval_recall_macro": 0.969774335436986,
485
+ "eval_recall_micro": 0.9727403156384505,
486
+ "eval_recall_weighted": 0.9727403156384505,
487
+ "eval_runtime": 3.4979,
488
+ "eval_samples_per_second": 199.263,
489
+ "eval_steps_per_second": 12.579,
490
+ "step": 1047
491
+ },
492
+ {
493
+ "epoch": 3.020057306590258,
494
+ "grad_norm": 9.481998443603516,
495
+ "learning_rate": 4.1602547094720085e-05,
496
+ "loss": 0.5422,
497
+ "step": 1054
498
+ },
499
+ {
500
+ "epoch": 3.0687679083094554,
501
+ "grad_norm": 9.671106338500977,
502
+ "learning_rate": 4.13770230830459e-05,
503
+ "loss": 0.408,
504
+ "step": 1071
505
+ },
506
+ {
507
+ "epoch": 3.1174785100286533,
508
+ "grad_norm": 1.3068506717681885,
509
+ "learning_rate": 4.115149907137172e-05,
510
+ "loss": 0.404,
511
+ "step": 1088
512
+ },
513
+ {
514
+ "epoch": 3.166189111747851,
515
+ "grad_norm": 8.020153045654297,
516
+ "learning_rate": 4.092597505969753e-05,
517
+ "loss": 0.4022,
518
+ "step": 1105
519
+ },
520
+ {
521
+ "epoch": 3.2148997134670485,
522
+ "grad_norm": 9.03290843963623,
523
+ "learning_rate": 4.070045104802335e-05,
524
+ "loss": 0.4726,
525
+ "step": 1122
526
+ },
527
+ {
528
+ "epoch": 3.2636103151862463,
529
+ "grad_norm": 8.347646713256836,
530
+ "learning_rate": 4.047492703634917e-05,
531
+ "loss": 0.4158,
532
+ "step": 1139
533
+ },
534
+ {
535
+ "epoch": 3.312320916905444,
536
+ "grad_norm": 9.76726245880127,
537
+ "learning_rate": 4.024940302467498e-05,
538
+ "loss": 0.3523,
539
+ "step": 1156
540
+ },
541
+ {
542
+ "epoch": 3.361031518624642,
543
+ "grad_norm": 8.464173316955566,
544
+ "learning_rate": 4.00238790130008e-05,
545
+ "loss": 0.5155,
546
+ "step": 1173
547
+ },
548
+ {
549
+ "epoch": 3.4097421203438394,
550
+ "grad_norm": 4.331398963928223,
551
+ "learning_rate": 3.9798355001326615e-05,
552
+ "loss": 0.395,
553
+ "step": 1190
554
+ },
555
+ {
556
+ "epoch": 3.458452722063037,
557
+ "grad_norm": 7.228985786437988,
558
+ "learning_rate": 3.9572830989652435e-05,
559
+ "loss": 0.3984,
560
+ "step": 1207
561
+ },
562
+ {
563
+ "epoch": 3.507163323782235,
564
+ "grad_norm": 10.442928314208984,
565
+ "learning_rate": 3.934730697797824e-05,
566
+ "loss": 0.4143,
567
+ "step": 1224
568
+ },
569
+ {
570
+ "epoch": 3.555873925501433,
571
+ "grad_norm": 8.429516792297363,
572
+ "learning_rate": 3.912178296630406e-05,
573
+ "loss": 0.3767,
574
+ "step": 1241
575
+ },
576
+ {
577
+ "epoch": 3.6045845272206303,
578
+ "grad_norm": 12.501051902770996,
579
+ "learning_rate": 3.8896258954629876e-05,
580
+ "loss": 0.3507,
581
+ "step": 1258
582
+ },
583
+ {
584
+ "epoch": 3.653295128939828,
585
+ "grad_norm": 7.7675652503967285,
586
+ "learning_rate": 3.86707349429557e-05,
587
+ "loss": 0.395,
588
+ "step": 1275
589
+ },
590
+ {
591
+ "epoch": 3.702005730659026,
592
+ "grad_norm": 4.184613227844238,
593
+ "learning_rate": 3.844521093128151e-05,
594
+ "loss": 0.2874,
595
+ "step": 1292
596
+ },
597
+ {
598
+ "epoch": 3.7507163323782233,
599
+ "grad_norm": 10.189749717712402,
600
+ "learning_rate": 3.8219686919607324e-05,
601
+ "loss": 0.3396,
602
+ "step": 1309
603
+ },
604
+ {
605
+ "epoch": 3.799426934097421,
606
+ "grad_norm": 2.022300958633423,
607
+ "learning_rate": 3.799416290793314e-05,
608
+ "loss": 0.2436,
609
+ "step": 1326
610
+ },
611
+ {
612
+ "epoch": 3.848137535816619,
613
+ "grad_norm": 8.71822452545166,
614
+ "learning_rate": 3.776863889625896e-05,
615
+ "loss": 0.3773,
616
+ "step": 1343
617
+ },
618
+ {
619
+ "epoch": 3.896848137535817,
620
+ "grad_norm": 7.879873752593994,
621
+ "learning_rate": 3.754311488458477e-05,
622
+ "loss": 0.4686,
623
+ "step": 1360
624
+ },
625
+ {
626
+ "epoch": 3.945558739255014,
627
+ "grad_norm": 1.0487672090530396,
628
+ "learning_rate": 3.731759087291059e-05,
629
+ "loss": 0.3921,
630
+ "step": 1377
631
+ },
632
+ {
633
+ "epoch": 3.994269340974212,
634
+ "grad_norm": 8.260384559631348,
635
+ "learning_rate": 3.70920668612364e-05,
636
+ "loss": 0.2956,
637
+ "step": 1394
638
+ },
639
+ {
640
+ "epoch": 4.0,
641
+ "eval_accuracy": 0.9885222381635581,
642
+ "eval_f1_macro": 0.9872476382417075,
643
+ "eval_f1_micro": 0.9885222381635581,
644
+ "eval_f1_weighted": 0.9882948425801252,
645
+ "eval_loss": 0.09004738181829453,
646
+ "eval_precision_macro": 0.9903644882560545,
647
+ "eval_precision_micro": 0.9885222381635581,
648
+ "eval_precision_weighted": 0.9902174543135807,
649
+ "eval_recall_macro": 0.9865557467967108,
650
+ "eval_recall_micro": 0.9885222381635581,
651
+ "eval_recall_weighted": 0.9885222381635581,
652
+ "eval_runtime": 3.3411,
653
+ "eval_samples_per_second": 208.616,
654
+ "eval_steps_per_second": 13.169,
655
+ "step": 1396
656
+ },
657
+ {
658
+ "epoch": 4.042979942693409,
659
+ "grad_norm": 6.5528106689453125,
660
+ "learning_rate": 3.686654284956222e-05,
661
+ "loss": 0.4403,
662
+ "step": 1411
663
+ },
664
+ {
665
+ "epoch": 4.091690544412607,
666
+ "grad_norm": 3.8414504528045654,
667
+ "learning_rate": 3.6641018837888034e-05,
668
+ "loss": 0.2637,
669
+ "step": 1428
670
+ },
671
+ {
672
+ "epoch": 4.140401146131805,
673
+ "grad_norm": 16.609180450439453,
674
+ "learning_rate": 3.6415494826213854e-05,
675
+ "loss": 0.3618,
676
+ "step": 1445
677
+ },
678
+ {
679
+ "epoch": 4.189111747851003,
680
+ "grad_norm": 2.179348945617676,
681
+ "learning_rate": 3.618997081453967e-05,
682
+ "loss": 0.4447,
683
+ "step": 1462
684
+ },
685
+ {
686
+ "epoch": 4.237822349570201,
687
+ "grad_norm": 3.5908546447753906,
688
+ "learning_rate": 3.596444680286548e-05,
689
+ "loss": 0.2905,
690
+ "step": 1479
691
+ },
692
+ {
693
+ "epoch": 4.286532951289399,
694
+ "grad_norm": 7.550769805908203,
695
+ "learning_rate": 3.5738922791191295e-05,
696
+ "loss": 0.2448,
697
+ "step": 1496
698
+ },
699
+ {
700
+ "epoch": 4.3352435530085955,
701
+ "grad_norm": 0.7109397649765015,
702
+ "learning_rate": 3.5513398779517116e-05,
703
+ "loss": 0.2127,
704
+ "step": 1513
705
+ },
706
+ {
707
+ "epoch": 4.383954154727793,
708
+ "grad_norm": 1.54320228099823,
709
+ "learning_rate": 3.528787476784293e-05,
710
+ "loss": 0.2202,
711
+ "step": 1530
712
+ },
713
+ {
714
+ "epoch": 4.432664756446991,
715
+ "grad_norm": 10.156286239624023,
716
+ "learning_rate": 3.506235075616875e-05,
717
+ "loss": 0.3263,
718
+ "step": 1547
719
+ },
720
+ {
721
+ "epoch": 4.481375358166189,
722
+ "grad_norm": 11.149276733398438,
723
+ "learning_rate": 3.4836826744494563e-05,
724
+ "loss": 0.213,
725
+ "step": 1564
726
+ },
727
+ {
728
+ "epoch": 4.530085959885387,
729
+ "grad_norm": 14.087788581848145,
730
+ "learning_rate": 3.461130273282038e-05,
731
+ "loss": 0.3907,
732
+ "step": 1581
733
+ },
734
+ {
735
+ "epoch": 4.578796561604585,
736
+ "grad_norm": 6.006841659545898,
737
+ "learning_rate": 3.43857787211462e-05,
738
+ "loss": 0.4959,
739
+ "step": 1598
740
+ },
741
+ {
742
+ "epoch": 4.6275071633237825,
743
+ "grad_norm": 6.818835258483887,
744
+ "learning_rate": 3.416025470947201e-05,
745
+ "loss": 0.3309,
746
+ "step": 1615
747
+ },
748
+ {
749
+ "epoch": 4.6762177650429795,
750
+ "grad_norm": 1.0696688890457153,
751
+ "learning_rate": 3.393473069779783e-05,
752
+ "loss": 0.2245,
753
+ "step": 1632
754
+ },
755
+ {
756
+ "epoch": 4.724928366762177,
757
+ "grad_norm": 11.383952140808105,
758
+ "learning_rate": 3.370920668612364e-05,
759
+ "loss": 0.3473,
760
+ "step": 1649
761
+ },
762
+ {
763
+ "epoch": 4.773638968481375,
764
+ "grad_norm": 7.438843727111816,
765
+ "learning_rate": 3.348368267444946e-05,
766
+ "loss": 0.2508,
767
+ "step": 1666
768
+ },
769
+ {
770
+ "epoch": 4.822349570200573,
771
+ "grad_norm": 1.553702473640442,
772
+ "learning_rate": 3.325815866277527e-05,
773
+ "loss": 0.2669,
774
+ "step": 1683
775
+ },
776
+ {
777
+ "epoch": 4.871060171919771,
778
+ "grad_norm": 5.968568325042725,
779
+ "learning_rate": 3.303263465110109e-05,
780
+ "loss": 0.1219,
781
+ "step": 1700
782
+ },
783
+ {
784
+ "epoch": 4.919770773638969,
785
+ "grad_norm": 0.3757087290287018,
786
+ "learning_rate": 3.280711063942691e-05,
787
+ "loss": 0.2749,
788
+ "step": 1717
789
+ },
790
+ {
791
+ "epoch": 4.9684813753581665,
792
+ "grad_norm": 7.143729209899902,
793
+ "learning_rate": 3.258158662775272e-05,
794
+ "loss": 0.1591,
795
+ "step": 1734
796
+ },
797
+ {
798
+ "epoch": 5.0,
799
+ "eval_accuracy": 0.9885222381635581,
800
+ "eval_f1_macro": 0.9880046003912989,
801
+ "eval_f1_micro": 0.9885222381635581,
802
+ "eval_f1_weighted": 0.9883902987879117,
803
+ "eval_loss": 0.05691728740930557,
804
+ "eval_precision_macro": 0.9900693683826214,
805
+ "eval_precision_micro": 0.9885222381635581,
806
+ "eval_precision_weighted": 0.9900236221613553,
807
+ "eval_recall_macro": 0.9879231210556513,
808
+ "eval_recall_micro": 0.9885222381635581,
809
+ "eval_recall_weighted": 0.9885222381635581,
810
+ "eval_runtime": 3.3973,
811
+ "eval_samples_per_second": 205.165,
812
+ "eval_steps_per_second": 12.952,
813
+ "step": 1745
814
+ },
815
+ {
816
+ "epoch": 5.017191977077364,
817
+ "grad_norm": 1.9127634763717651,
818
+ "learning_rate": 3.2356062616078534e-05,
819
+ "loss": 0.397,
820
+ "step": 1751
821
+ },
822
+ {
823
+ "epoch": 5.065902578796561,
824
+ "grad_norm": 8.336675643920898,
825
+ "learning_rate": 3.2130538604404355e-05,
826
+ "loss": 0.2355,
827
+ "step": 1768
828
+ },
829
+ {
830
+ "epoch": 5.114613180515759,
831
+ "grad_norm": 7.159496307373047,
832
+ "learning_rate": 3.190501459273017e-05,
833
+ "loss": 0.2086,
834
+ "step": 1785
835
+ },
836
+ {
837
+ "epoch": 5.163323782234957,
838
+ "grad_norm": 6.03056526184082,
839
+ "learning_rate": 3.167949058105599e-05,
840
+ "loss": 0.2212,
841
+ "step": 1802
842
+ },
843
+ {
844
+ "epoch": 5.212034383954155,
845
+ "grad_norm": 7.681415557861328,
846
+ "learning_rate": 3.1453966569381796e-05,
847
+ "loss": 0.2721,
848
+ "step": 1819
849
+ },
850
+ {
851
+ "epoch": 5.260744985673353,
852
+ "grad_norm": 1.4911251068115234,
853
+ "learning_rate": 3.1228442557707616e-05,
854
+ "loss": 0.1994,
855
+ "step": 1836
856
+ },
857
+ {
858
+ "epoch": 5.30945558739255,
859
+ "grad_norm": 7.99345588684082,
860
+ "learning_rate": 3.100291854603343e-05,
861
+ "loss": 0.312,
862
+ "step": 1853
863
+ },
864
+ {
865
+ "epoch": 5.358166189111748,
866
+ "grad_norm": 3.288712978363037,
867
+ "learning_rate": 3.077739453435925e-05,
868
+ "loss": 0.3002,
869
+ "step": 1870
870
+ },
871
+ {
872
+ "epoch": 5.406876790830945,
873
+ "grad_norm": 0.1384359449148178,
874
+ "learning_rate": 3.0551870522685064e-05,
875
+ "loss": 0.1875,
876
+ "step": 1887
877
+ },
878
+ {
879
+ "epoch": 5.455587392550143,
880
+ "grad_norm": 2.912055730819702,
881
+ "learning_rate": 3.0326346511010878e-05,
882
+ "loss": 0.1617,
883
+ "step": 1904
884
+ },
885
+ {
886
+ "epoch": 5.504297994269341,
887
+ "grad_norm": 9.510294914245605,
888
+ "learning_rate": 3.0100822499336695e-05,
889
+ "loss": 0.1553,
890
+ "step": 1921
891
+ },
892
+ {
893
+ "epoch": 5.553008595988539,
894
+ "grad_norm": 5.520040988922119,
895
+ "learning_rate": 2.9875298487662512e-05,
896
+ "loss": 0.1949,
897
+ "step": 1938
898
+ },
899
+ {
900
+ "epoch": 5.6017191977077365,
901
+ "grad_norm": 0.39325079321861267,
902
+ "learning_rate": 2.964977447598833e-05,
903
+ "loss": 0.2814,
904
+ "step": 1955
905
+ },
906
+ {
907
+ "epoch": 5.650429799426934,
908
+ "grad_norm": 0.1934385895729065,
909
+ "learning_rate": 2.942425046431414e-05,
910
+ "loss": 0.3251,
911
+ "step": 1972
912
+ },
913
+ {
914
+ "epoch": 5.699140401146132,
915
+ "grad_norm": 5.8890533447265625,
916
+ "learning_rate": 2.9198726452639957e-05,
917
+ "loss": 0.1652,
918
+ "step": 1989
919
+ },
920
+ {
921
+ "epoch": 5.747851002865329,
922
+ "grad_norm": 5.028823375701904,
923
+ "learning_rate": 2.8973202440965774e-05,
924
+ "loss": 0.305,
925
+ "step": 2006
926
+ },
927
+ {
928
+ "epoch": 5.796561604584527,
929
+ "grad_norm": 0.35111504793167114,
930
+ "learning_rate": 2.874767842929159e-05,
931
+ "loss": 0.1684,
932
+ "step": 2023
933
+ },
934
+ {
935
+ "epoch": 5.845272206303725,
936
+ "grad_norm": 1.910530686378479,
937
+ "learning_rate": 2.8522154417617408e-05,
938
+ "loss": 0.2535,
939
+ "step": 2040
940
+ },
941
+ {
942
+ "epoch": 5.893982808022923,
943
+ "grad_norm": 5.5074334144592285,
944
+ "learning_rate": 2.8296630405943218e-05,
945
+ "loss": 0.2924,
946
+ "step": 2057
947
+ },
948
+ {
949
+ "epoch": 5.9426934097421205,
950
+ "grad_norm": 6.081971645355225,
951
+ "learning_rate": 2.8071106394269035e-05,
952
+ "loss": 0.1663,
953
+ "step": 2074
954
+ },
955
+ {
956
+ "epoch": 5.991404011461318,
957
+ "grad_norm": 1.7783217430114746,
958
+ "learning_rate": 2.7845582382594852e-05,
959
+ "loss": 0.1912,
960
+ "step": 2091
961
+ },
962
+ {
963
+ "epoch": 6.0,
964
+ "eval_accuracy": 0.9971305595408895,
965
+ "eval_f1_macro": 0.997156659844563,
966
+ "eval_f1_micro": 0.9971305595408895,
967
+ "eval_f1_weighted": 0.9971139798573604,
968
+ "eval_loss": 0.031095275655388832,
969
+ "eval_precision_macro": 0.9975660216624072,
970
+ "eval_precision_micro": 0.9971305595408895,
971
+ "eval_precision_weighted": 0.9974204020115067,
972
+ "eval_recall_macro": 0.9970740103270225,
973
+ "eval_recall_micro": 0.9971305595408895,
974
+ "eval_recall_weighted": 0.9971305595408895,
975
+ "eval_runtime": 3.3447,
976
+ "eval_samples_per_second": 208.387,
977
+ "eval_steps_per_second": 13.155,
978
+ "step": 2094
979
+ },
980
+ {
981
+ "epoch": 6.040114613180516,
982
+ "grad_norm": 12.544295310974121,
983
+ "learning_rate": 2.762005837092067e-05,
984
+ "loss": 0.2401,
985
+ "step": 2108
986
+ },
987
+ {
988
+ "epoch": 6.088825214899713,
989
+ "grad_norm": 0.36089888215065,
990
+ "learning_rate": 2.7394534359246486e-05,
991
+ "loss": 0.1197,
992
+ "step": 2125
993
+ },
994
+ {
995
+ "epoch": 6.137535816618911,
996
+ "grad_norm": 3.824916124343872,
997
+ "learning_rate": 2.71690103475723e-05,
998
+ "loss": 0.2304,
999
+ "step": 2142
1000
+ },
1001
+ {
1002
+ "epoch": 6.186246418338109,
1003
+ "grad_norm": 7.010196685791016,
1004
+ "learning_rate": 2.6943486335898117e-05,
1005
+ "loss": 0.2393,
1006
+ "step": 2159
1007
+ },
1008
+ {
1009
+ "epoch": 6.234957020057307,
1010
+ "grad_norm": 0.23738817870616913,
1011
+ "learning_rate": 2.6717962324223934e-05,
1012
+ "loss": 0.2878,
1013
+ "step": 2176
1014
+ },
1015
+ {
1016
+ "epoch": 6.283667621776504,
1017
+ "grad_norm": 6.685153961181641,
1018
+ "learning_rate": 2.649243831254975e-05,
1019
+ "loss": 0.23,
1020
+ "step": 2193
1021
+ },
1022
+ {
1023
+ "epoch": 6.332378223495702,
1024
+ "grad_norm": 9.155635833740234,
1025
+ "learning_rate": 2.626691430087557e-05,
1026
+ "loss": 0.127,
1027
+ "step": 2210
1028
+ },
1029
+ {
1030
+ "epoch": 6.3810888252149,
1031
+ "grad_norm": 1.8248714208602905,
1032
+ "learning_rate": 2.604139028920138e-05,
1033
+ "loss": 0.264,
1034
+ "step": 2227
1035
+ },
1036
+ {
1037
+ "epoch": 6.429799426934097,
1038
+ "grad_norm": 3.287179946899414,
1039
+ "learning_rate": 2.5815866277527196e-05,
1040
+ "loss": 0.2568,
1041
+ "step": 2244
1042
+ },
1043
+ {
1044
+ "epoch": 6.478510028653295,
1045
+ "grad_norm": 0.12249535322189331,
1046
+ "learning_rate": 2.5590342265853013e-05,
1047
+ "loss": 0.1211,
1048
+ "step": 2261
1049
+ },
1050
+ {
1051
+ "epoch": 6.527220630372493,
1052
+ "grad_norm": 9.649252891540527,
1053
+ "learning_rate": 2.536481825417883e-05,
1054
+ "loss": 0.2599,
1055
+ "step": 2278
1056
+ },
1057
+ {
1058
+ "epoch": 6.5759312320916905,
1059
+ "grad_norm": 8.501516342163086,
1060
+ "learning_rate": 2.5139294242504647e-05,
1061
+ "loss": 0.1997,
1062
+ "step": 2295
1063
+ },
1064
+ {
1065
+ "epoch": 6.624641833810888,
1066
+ "grad_norm": 6.785931587219238,
1067
+ "learning_rate": 2.491377023083046e-05,
1068
+ "loss": 0.1947,
1069
+ "step": 2312
1070
+ },
1071
+ {
1072
+ "epoch": 6.673352435530086,
1073
+ "grad_norm": 1.3328988552093506,
1074
+ "learning_rate": 2.4688246219156274e-05,
1075
+ "loss": 0.2236,
1076
+ "step": 2329
1077
+ },
1078
+ {
1079
+ "epoch": 6.722063037249284,
1080
+ "grad_norm": 9.384140014648438,
1081
+ "learning_rate": 2.446272220748209e-05,
1082
+ "loss": 0.2394,
1083
+ "step": 2346
1084
+ },
1085
+ {
1086
+ "epoch": 6.770773638968482,
1087
+ "grad_norm": 1.0058611631393433,
1088
+ "learning_rate": 2.423719819580791e-05,
1089
+ "loss": 0.1288,
1090
+ "step": 2363
1091
+ },
1092
+ {
1093
+ "epoch": 6.819484240687679,
1094
+ "grad_norm": 0.7905517816543579,
1095
+ "learning_rate": 2.4011674184133722e-05,
1096
+ "loss": 0.1528,
1097
+ "step": 2380
1098
+ },
1099
+ {
1100
+ "epoch": 6.868194842406877,
1101
+ "grad_norm": 10.827178955078125,
1102
+ "learning_rate": 2.378615017245954e-05,
1103
+ "loss": 0.1767,
1104
+ "step": 2397
1105
+ },
1106
+ {
1107
+ "epoch": 6.916905444126074,
1108
+ "grad_norm": 7.897141933441162,
1109
+ "learning_rate": 2.3560626160785353e-05,
1110
+ "loss": 0.1411,
1111
+ "step": 2414
1112
+ },
1113
+ {
1114
+ "epoch": 6.965616045845272,
1115
+ "grad_norm": 4.635827541351318,
1116
+ "learning_rate": 2.333510214911117e-05,
1117
+ "loss": 0.1712,
1118
+ "step": 2431
1119
+ },
1120
+ {
1121
+ "epoch": 7.0,
1122
+ "eval_accuracy": 0.9956958393113343,
1123
+ "eval_f1_macro": 0.9951431111442676,
1124
+ "eval_f1_micro": 0.9956958393113343,
1125
+ "eval_f1_weighted": 0.9956405011600654,
1126
+ "eval_loss": 0.024958999827504158,
1127
+ "eval_precision_macro": 0.995983935742972,
1128
+ "eval_precision_micro": 0.9956958393113343,
1129
+ "eval_precision_weighted": 0.9964929061055317,
1130
+ "eval_recall_macro": 0.9953528399311532,
1131
+ "eval_recall_micro": 0.9956958393113343,
1132
+ "eval_recall_weighted": 0.9956958393113343,
1133
+ "eval_runtime": 3.4628,
1134
+ "eval_samples_per_second": 201.281,
1135
+ "eval_steps_per_second": 12.706,
1136
+ "step": 2443
1137
+ },
1138
+ {
1139
+ "epoch": 7.01432664756447,
1140
+ "grad_norm": 0.12232652306556702,
1141
+ "learning_rate": 2.3109578137436987e-05,
1142
+ "loss": 0.2055,
1143
+ "step": 2448
1144
+ },
1145
+ {
1146
+ "epoch": 7.063037249283668,
1147
+ "grad_norm": 0.07312128692865372,
1148
+ "learning_rate": 2.28840541257628e-05,
1149
+ "loss": 0.1848,
1150
+ "step": 2465
1151
+ },
1152
+ {
1153
+ "epoch": 7.111747851002866,
1154
+ "grad_norm": 0.32181409001350403,
1155
+ "learning_rate": 2.2658530114088618e-05,
1156
+ "loss": 0.1364,
1157
+ "step": 2482
1158
+ },
1159
+ {
1160
+ "epoch": 7.160458452722063,
1161
+ "grad_norm": 0.7672788500785828,
1162
+ "learning_rate": 2.2433006102414432e-05,
1163
+ "loss": 0.072,
1164
+ "step": 2499
1165
+ },
1166
+ {
1167
+ "epoch": 7.2091690544412605,
1168
+ "grad_norm": 8.377331733703613,
1169
+ "learning_rate": 2.220748209074025e-05,
1170
+ "loss": 0.2638,
1171
+ "step": 2516
1172
+ },
1173
+ {
1174
+ "epoch": 7.257879656160458,
1175
+ "grad_norm": 9.670488357543945,
1176
+ "learning_rate": 2.1981958079066066e-05,
1177
+ "loss": 0.2495,
1178
+ "step": 2533
1179
+ },
1180
+ {
1181
+ "epoch": 7.306590257879656,
1182
+ "grad_norm": 0.24363534152507782,
1183
+ "learning_rate": 2.1756434067391883e-05,
1184
+ "loss": 0.2038,
1185
+ "step": 2550
1186
+ },
1187
+ {
1188
+ "epoch": 7.355300859598854,
1189
+ "grad_norm": 2.2357654571533203,
1190
+ "learning_rate": 2.15309100557177e-05,
1191
+ "loss": 0.2934,
1192
+ "step": 2567
1193
+ },
1194
+ {
1195
+ "epoch": 7.404011461318052,
1196
+ "grad_norm": 0.20546384155750275,
1197
+ "learning_rate": 2.1305386044043514e-05,
1198
+ "loss": 0.1834,
1199
+ "step": 2584
1200
+ },
1201
+ {
1202
+ "epoch": 7.45272206303725,
1203
+ "grad_norm": 0.32598844170570374,
1204
+ "learning_rate": 2.107986203236933e-05,
1205
+ "loss": 0.0821,
1206
+ "step": 2601
1207
+ },
1208
+ {
1209
+ "epoch": 7.501432664756447,
1210
+ "grad_norm": 8.553650856018066,
1211
+ "learning_rate": 2.0854338020695148e-05,
1212
+ "loss": 0.0992,
1213
+ "step": 2618
1214
+ },
1215
+ {
1216
+ "epoch": 7.5501432664756445,
1217
+ "grad_norm": 0.3119734525680542,
1218
+ "learning_rate": 2.062881400902096e-05,
1219
+ "loss": 0.2344,
1220
+ "step": 2635
1221
+ },
1222
+ {
1223
+ "epoch": 7.598853868194842,
1224
+ "grad_norm": 6.1670002937316895,
1225
+ "learning_rate": 2.040328999734678e-05,
1226
+ "loss": 0.1058,
1227
+ "step": 2652
1228
+ },
1229
+ {
1230
+ "epoch": 7.64756446991404,
1231
+ "grad_norm": 2.705218553543091,
1232
+ "learning_rate": 2.0177765985672592e-05,
1233
+ "loss": 0.1608,
1234
+ "step": 2669
1235
+ },
1236
+ {
1237
+ "epoch": 7.696275071633238,
1238
+ "grad_norm": 5.938003063201904,
1239
+ "learning_rate": 1.995224197399841e-05,
1240
+ "loss": 0.1554,
1241
+ "step": 2686
1242
+ },
1243
+ {
1244
+ "epoch": 7.744985673352436,
1245
+ "grad_norm": 0.41698479652404785,
1246
+ "learning_rate": 1.9726717962324227e-05,
1247
+ "loss": 0.0979,
1248
+ "step": 2703
1249
+ },
1250
+ {
1251
+ "epoch": 7.793696275071634,
1252
+ "grad_norm": 0.4503624141216278,
1253
+ "learning_rate": 1.950119395065004e-05,
1254
+ "loss": 0.1353,
1255
+ "step": 2720
1256
+ },
1257
+ {
1258
+ "epoch": 7.842406876790831,
1259
+ "grad_norm": 4.662674427032471,
1260
+ "learning_rate": 1.9275669938975857e-05,
1261
+ "loss": 0.1295,
1262
+ "step": 2737
1263
+ },
1264
+ {
1265
+ "epoch": 7.891117478510028,
1266
+ "grad_norm": 5.666357517242432,
1267
+ "learning_rate": 1.905014592730167e-05,
1268
+ "loss": 0.2144,
1269
+ "step": 2754
1270
+ },
1271
+ {
1272
+ "epoch": 7.939828080229226,
1273
+ "grad_norm": 0.6394052505493164,
1274
+ "learning_rate": 1.8824621915627488e-05,
1275
+ "loss": 0.178,
1276
+ "step": 2771
1277
+ },
1278
+ {
1279
+ "epoch": 7.988538681948424,
1280
+ "grad_norm": 9.961098670959473,
1281
+ "learning_rate": 1.8599097903953305e-05,
1282
+ "loss": 0.2561,
1283
+ "step": 2788
1284
+ },
1285
+ {
1286
+ "epoch": 8.0,
1287
+ "eval_accuracy": 0.9956958393113343,
1288
+ "eval_f1_macro": 0.9951431111442676,
1289
+ "eval_f1_micro": 0.9956958393113343,
1290
+ "eval_f1_weighted": 0.9956405011600654,
1291
+ "eval_loss": 0.023447172716259956,
1292
+ "eval_precision_macro": 0.995983935742972,
1293
+ "eval_precision_micro": 0.9956958393113343,
1294
+ "eval_precision_weighted": 0.9964929061055317,
1295
+ "eval_recall_macro": 0.9953528399311532,
1296
+ "eval_recall_micro": 0.9956958393113343,
1297
+ "eval_recall_weighted": 0.9956958393113343,
1298
+ "eval_runtime": 3.3596,
1299
+ "eval_samples_per_second": 207.466,
1300
+ "eval_steps_per_second": 13.097,
1301
+ "step": 2792
1302
+ },
1303
+ {
1304
+ "epoch": 8.037249283667622,
1305
+ "grad_norm": 10.93313217163086,
1306
+ "learning_rate": 1.837357389227912e-05,
1307
+ "loss": 0.11,
1308
+ "step": 2805
1309
+ },
1310
+ {
1311
+ "epoch": 8.085959885386819,
1312
+ "grad_norm": 0.05085707828402519,
1313
+ "learning_rate": 1.8148049880604936e-05,
1314
+ "loss": 0.0616,
1315
+ "step": 2822
1316
+ },
1317
+ {
1318
+ "epoch": 8.134670487106018,
1319
+ "grad_norm": 10.42803955078125,
1320
+ "learning_rate": 1.792252586893075e-05,
1321
+ "loss": 0.0648,
1322
+ "step": 2839
1323
+ },
1324
+ {
1325
+ "epoch": 8.183381088825215,
1326
+ "grad_norm": 1.379164457321167,
1327
+ "learning_rate": 1.7697001857256567e-05,
1328
+ "loss": 0.1706,
1329
+ "step": 2856
1330
+ },
1331
+ {
1332
+ "epoch": 8.232091690544413,
1333
+ "grad_norm": 8.783364295959473,
1334
+ "learning_rate": 1.7471477845582384e-05,
1335
+ "loss": 0.1954,
1336
+ "step": 2873
1337
+ },
1338
+ {
1339
+ "epoch": 8.28080229226361,
1340
+ "grad_norm": 1.5522698163986206,
1341
+ "learning_rate": 1.7245953833908197e-05,
1342
+ "loss": 0.2134,
1343
+ "step": 2890
1344
+ },
1345
+ {
1346
+ "epoch": 8.329512893982809,
1347
+ "grad_norm": 6.784268379211426,
1348
+ "learning_rate": 1.7020429822234015e-05,
1349
+ "loss": 0.1928,
1350
+ "step": 2907
1351
+ },
1352
+ {
1353
+ "epoch": 8.378223495702006,
1354
+ "grad_norm": 3.0361063480377197,
1355
+ "learning_rate": 1.679490581055983e-05,
1356
+ "loss": 0.3187,
1357
+ "step": 2924
1358
+ },
1359
+ {
1360
+ "epoch": 8.426934097421203,
1361
+ "grad_norm": 1.8513216972351074,
1362
+ "learning_rate": 1.6569381798885645e-05,
1363
+ "loss": 0.1319,
1364
+ "step": 2941
1365
+ },
1366
+ {
1367
+ "epoch": 8.475644699140402,
1368
+ "grad_norm": 0.5567758083343506,
1369
+ "learning_rate": 1.6343857787211462e-05,
1370
+ "loss": 0.136,
1371
+ "step": 2958
1372
+ },
1373
+ {
1374
+ "epoch": 8.524355300859598,
1375
+ "grad_norm": 2.810915231704712,
1376
+ "learning_rate": 1.611833377553728e-05,
1377
+ "loss": 0.0858,
1378
+ "step": 2975
1379
+ },
1380
+ {
1381
+ "epoch": 8.573065902578797,
1382
+ "grad_norm": 1.9855493307113647,
1383
+ "learning_rate": 1.5892809763863097e-05,
1384
+ "loss": 0.1732,
1385
+ "step": 2992
1386
+ },
1387
+ {
1388
+ "epoch": 8.621776504297994,
1389
+ "grad_norm": 0.1735798567533493,
1390
+ "learning_rate": 1.566728575218891e-05,
1391
+ "loss": 0.0911,
1392
+ "step": 3009
1393
+ },
1394
+ {
1395
+ "epoch": 8.670487106017191,
1396
+ "grad_norm": 0.19329993426799774,
1397
+ "learning_rate": 1.5441761740514727e-05,
1398
+ "loss": 0.1024,
1399
+ "step": 3026
1400
+ },
1401
+ {
1402
+ "epoch": 8.71919770773639,
1403
+ "grad_norm": 4.43624210357666,
1404
+ "learning_rate": 1.5216237728840543e-05,
1405
+ "loss": 0.1408,
1406
+ "step": 3043
1407
+ },
1408
+ {
1409
+ "epoch": 8.767908309455587,
1410
+ "grad_norm": 9.911310195922852,
1411
+ "learning_rate": 1.4990713717166358e-05,
1412
+ "loss": 0.1626,
1413
+ "step": 3060
1414
+ },
1415
+ {
1416
+ "epoch": 8.816618911174785,
1417
+ "grad_norm": 1.323052167892456,
1418
+ "learning_rate": 1.4765189705492175e-05,
1419
+ "loss": 0.1222,
1420
+ "step": 3077
1421
+ },
1422
+ {
1423
+ "epoch": 8.865329512893982,
1424
+ "grad_norm": 11.561975479125977,
1425
+ "learning_rate": 1.4539665693817989e-05,
1426
+ "loss": 0.1769,
1427
+ "step": 3094
1428
+ },
1429
+ {
1430
+ "epoch": 8.914040114613181,
1431
+ "grad_norm": 0.08104487508535385,
1432
+ "learning_rate": 1.4314141682143806e-05,
1433
+ "loss": 0.0953,
1434
+ "step": 3111
1435
+ },
1436
+ {
1437
+ "epoch": 8.962750716332378,
1438
+ "grad_norm": 0.04927730932831764,
1439
+ "learning_rate": 1.408861767046962e-05,
1440
+ "loss": 0.0574,
1441
+ "step": 3128
1442
+ },
1443
+ {
1444
+ "epoch": 9.0,
1445
+ "eval_accuracy": 0.9956958393113343,
1446
+ "eval_f1_macro": 0.9948504424939576,
1447
+ "eval_f1_micro": 0.9956958393113343,
1448
+ "eval_f1_weighted": 0.9956230754082893,
1449
+ "eval_loss": 0.017519734799861908,
1450
+ "eval_precision_macro": 0.995983935742972,
1451
+ "eval_precision_micro": 0.9956958393113343,
1452
+ "eval_precision_weighted": 0.9964929061055317,
1453
+ "eval_recall_macro": 0.9948364888123926,
1454
+ "eval_recall_micro": 0.9956958393113343,
1455
+ "eval_recall_weighted": 0.9956958393113343,
1456
+ "eval_runtime": 3.4072,
1457
+ "eval_samples_per_second": 204.565,
1458
+ "eval_steps_per_second": 12.914,
1459
+ "step": 3141
1460
+ },
1461
+ {
1462
+ "epoch": 9.011461318051577,
1463
+ "grad_norm": 0.3880198001861572,
1464
+ "learning_rate": 1.3863093658795437e-05,
1465
+ "loss": 0.1954,
1466
+ "step": 3145
1467
+ },
1468
+ {
1469
+ "epoch": 9.060171919770774,
1470
+ "grad_norm": 0.05076654255390167,
1471
+ "learning_rate": 1.3637569647121254e-05,
1472
+ "loss": 0.0797,
1473
+ "step": 3162
1474
+ },
1475
+ {
1476
+ "epoch": 9.10888252148997,
1477
+ "grad_norm": 6.032546043395996,
1478
+ "learning_rate": 1.3412045635447068e-05,
1479
+ "loss": 0.2393,
1480
+ "step": 3179
1481
+ },
1482
+ {
1483
+ "epoch": 9.15759312320917,
1484
+ "grad_norm": 11.056164741516113,
1485
+ "learning_rate": 1.3186521623772885e-05,
1486
+ "loss": 0.1001,
1487
+ "step": 3196
1488
+ },
1489
+ {
1490
+ "epoch": 9.206303724928366,
1491
+ "grad_norm": 0.19840994477272034,
1492
+ "learning_rate": 1.29609976120987e-05,
1493
+ "loss": 0.0743,
1494
+ "step": 3213
1495
+ },
1496
+ {
1497
+ "epoch": 9.255014326647565,
1498
+ "grad_norm": 4.645060062408447,
1499
+ "learning_rate": 1.2735473600424515e-05,
1500
+ "loss": 0.1446,
1501
+ "step": 3230
1502
+ },
1503
+ {
1504
+ "epoch": 9.303724928366762,
1505
+ "grad_norm": 9.013117790222168,
1506
+ "learning_rate": 1.2509949588750332e-05,
1507
+ "loss": 0.1288,
1508
+ "step": 3247
1509
+ },
1510
+ {
1511
+ "epoch": 9.35243553008596,
1512
+ "grad_norm": 1.7711181640625,
1513
+ "learning_rate": 1.228442557707615e-05,
1514
+ "loss": 0.0835,
1515
+ "step": 3264
1516
+ },
1517
+ {
1518
+ "epoch": 9.401146131805158,
1519
+ "grad_norm": 5.3366379737854,
1520
+ "learning_rate": 1.2058901565401965e-05,
1521
+ "loss": 0.1201,
1522
+ "step": 3281
1523
+ },
1524
+ {
1525
+ "epoch": 9.449856733524355,
1526
+ "grad_norm": 3.4900286197662354,
1527
+ "learning_rate": 1.183337755372778e-05,
1528
+ "loss": 0.1026,
1529
+ "step": 3298
1530
+ },
1531
+ {
1532
+ "epoch": 9.498567335243553,
1533
+ "grad_norm": 0.059569913893938065,
1534
+ "learning_rate": 1.1607853542053596e-05,
1535
+ "loss": 0.1123,
1536
+ "step": 3315
1537
+ },
1538
+ {
1539
+ "epoch": 9.54727793696275,
1540
+ "grad_norm": 8.251703262329102,
1541
+ "learning_rate": 1.1382329530379411e-05,
1542
+ "loss": 0.1441,
1543
+ "step": 3332
1544
+ },
1545
+ {
1546
+ "epoch": 9.595988538681949,
1547
+ "grad_norm": 1.078260064125061,
1548
+ "learning_rate": 1.1156805518705226e-05,
1549
+ "loss": 0.1191,
1550
+ "step": 3349
1551
+ },
1552
+ {
1553
+ "epoch": 9.644699140401146,
1554
+ "grad_norm": 7.364470958709717,
1555
+ "learning_rate": 1.0931281507031044e-05,
1556
+ "loss": 0.1539,
1557
+ "step": 3366
1558
+ },
1559
+ {
1560
+ "epoch": 9.693409742120345,
1561
+ "grad_norm": 2.354499101638794,
1562
+ "learning_rate": 1.0705757495356859e-05,
1563
+ "loss": 0.1502,
1564
+ "step": 3383
1565
+ },
1566
+ {
1567
+ "epoch": 9.742120343839542,
1568
+ "grad_norm": 10.193525314331055,
1569
+ "learning_rate": 1.0480233483682674e-05,
1570
+ "loss": 0.1199,
1571
+ "step": 3400
1572
+ },
1573
+ {
1574
+ "epoch": 9.790830945558739,
1575
+ "grad_norm": 2.536367893218994,
1576
+ "learning_rate": 1.025470947200849e-05,
1577
+ "loss": 0.2098,
1578
+ "step": 3417
1579
+ },
1580
+ {
1581
+ "epoch": 9.839541547277937,
1582
+ "grad_norm": 5.09243631362915,
1583
+ "learning_rate": 1.0029185460334307e-05,
1584
+ "loss": 0.2896,
1585
+ "step": 3434
1586
+ },
1587
+ {
1588
+ "epoch": 9.888252148997134,
1589
+ "grad_norm": 1.098929762840271,
1590
+ "learning_rate": 9.803661448660124e-06,
1591
+ "loss": 0.1017,
1592
+ "step": 3451
1593
+ },
1594
+ {
1595
+ "epoch": 9.936962750716333,
1596
+ "grad_norm": 0.1535651981830597,
1597
+ "learning_rate": 9.57813743698594e-06,
1598
+ "loss": 0.1077,
1599
+ "step": 3468
1600
+ },
1601
+ {
1602
+ "epoch": 9.98567335243553,
1603
+ "grad_norm": 0.5236210227012634,
1604
+ "learning_rate": 9.352613425311755e-06,
1605
+ "loss": 0.084,
1606
+ "step": 3485
1607
+ },
1608
+ {
1609
+ "epoch": 10.0,
1610
+ "eval_accuracy": 0.9956958393113343,
1611
+ "eval_f1_macro": 0.9949831411206324,
1612
+ "eval_f1_micro": 0.9956958393113343,
1613
+ "eval_f1_weighted": 0.9955771743939521,
1614
+ "eval_loss": 0.015395666472613811,
1615
+ "eval_precision_macro": 0.9964707314104905,
1616
+ "eval_precision_micro": 0.9956958393113343,
1617
+ "eval_precision_weighted": 0.9963769691172847,
1618
+ "eval_recall_macro": 0.9945496270797476,
1619
+ "eval_recall_micro": 0.9956958393113343,
1620
+ "eval_recall_weighted": 0.9956958393113343,
1621
+ "eval_runtime": 3.4096,
1622
+ "eval_samples_per_second": 204.424,
1623
+ "eval_steps_per_second": 12.905,
1624
+ "step": 3490
1625
+ },
1626
+ {
1627
+ "epoch": 10.034383954154729,
1628
+ "grad_norm": 0.10064805299043655,
1629
+ "learning_rate": 9.12708941363757e-06,
1630
+ "loss": 0.129,
1631
+ "step": 3502
1632
+ },
1633
+ {
1634
+ "epoch": 10.083094555873926,
1635
+ "grad_norm": 0.9957130551338196,
1636
+ "learning_rate": 8.901565401963385e-06,
1637
+ "loss": 0.0652,
1638
+ "step": 3519
1639
+ },
1640
+ {
1641
+ "epoch": 10.131805157593123,
1642
+ "grad_norm": 4.763299465179443,
1643
+ "learning_rate": 8.676041390289202e-06,
1644
+ "loss": 0.1531,
1645
+ "step": 3536
1646
+ },
1647
+ {
1648
+ "epoch": 10.180515759312321,
1649
+ "grad_norm": 0.9550924897193909,
1650
+ "learning_rate": 8.450517378615018e-06,
1651
+ "loss": 0.0548,
1652
+ "step": 3553
1653
+ },
1654
+ {
1655
+ "epoch": 10.229226361031518,
1656
+ "grad_norm": 0.5510568022727966,
1657
+ "learning_rate": 8.224993366940833e-06,
1658
+ "loss": 0.171,
1659
+ "step": 3570
1660
+ },
1661
+ {
1662
+ "epoch": 10.277936962750717,
1663
+ "grad_norm": 1.120082139968872,
1664
+ "learning_rate": 7.999469355266649e-06,
1665
+ "loss": 0.1506,
1666
+ "step": 3587
1667
+ },
1668
+ {
1669
+ "epoch": 10.326647564469914,
1670
+ "grad_norm": 8.000428199768066,
1671
+ "learning_rate": 7.773945343592464e-06,
1672
+ "loss": 0.0791,
1673
+ "step": 3604
1674
+ },
1675
+ {
1676
+ "epoch": 10.375358166189113,
1677
+ "grad_norm": 0.08897445350885391,
1678
+ "learning_rate": 7.548421331918282e-06,
1679
+ "loss": 0.1006,
1680
+ "step": 3621
1681
+ },
1682
+ {
1683
+ "epoch": 10.42406876790831,
1684
+ "grad_norm": 1.5076502561569214,
1685
+ "learning_rate": 7.322897320244097e-06,
1686
+ "loss": 0.1085,
1687
+ "step": 3638
1688
+ },
1689
+ {
1690
+ "epoch": 10.472779369627506,
1691
+ "grad_norm": 0.3444303870201111,
1692
+ "learning_rate": 7.097373308569913e-06,
1693
+ "loss": 0.0881,
1694
+ "step": 3655
1695
+ },
1696
+ {
1697
+ "epoch": 10.521489971346705,
1698
+ "grad_norm": 5.353268146514893,
1699
+ "learning_rate": 6.871849296895728e-06,
1700
+ "loss": 0.1233,
1701
+ "step": 3672
1702
+ },
1703
+ {
1704
+ "epoch": 10.570200573065902,
1705
+ "grad_norm": 6.925529479980469,
1706
+ "learning_rate": 6.646325285221544e-06,
1707
+ "loss": 0.1726,
1708
+ "step": 3689
1709
+ },
1710
+ {
1711
+ "epoch": 10.6189111747851,
1712
+ "grad_norm": 1.2398282289505005,
1713
+ "learning_rate": 6.42080127354736e-06,
1714
+ "loss": 0.1607,
1715
+ "step": 3706
1716
+ },
1717
+ {
1718
+ "epoch": 10.667621776504298,
1719
+ "grad_norm": 0.17667262256145477,
1720
+ "learning_rate": 6.195277261873176e-06,
1721
+ "loss": 0.1065,
1722
+ "step": 3723
1723
+ },
1724
+ {
1725
+ "epoch": 10.716332378223496,
1726
+ "grad_norm": 8.593550682067871,
1727
+ "learning_rate": 5.969753250198992e-06,
1728
+ "loss": 0.1598,
1729
+ "step": 3740
1730
+ },
1731
+ {
1732
+ "epoch": 10.765042979942693,
1733
+ "grad_norm": 6.634376049041748,
1734
+ "learning_rate": 5.744229238524808e-06,
1735
+ "loss": 0.214,
1736
+ "step": 3757
1737
+ },
1738
+ {
1739
+ "epoch": 10.81375358166189,
1740
+ "grad_norm": 0.12865765392780304,
1741
+ "learning_rate": 5.518705226850624e-06,
1742
+ "loss": 0.1079,
1743
+ "step": 3774
1744
+ },
1745
+ {
1746
+ "epoch": 10.862464183381089,
1747
+ "grad_norm": 0.4588039815425873,
1748
+ "learning_rate": 5.29318121517644e-06,
1749
+ "loss": 0.1218,
1750
+ "step": 3791
1751
+ },
1752
+ {
1753
+ "epoch": 10.911174785100286,
1754
+ "grad_norm": 8.046585083007812,
1755
+ "learning_rate": 5.0676572035022555e-06,
1756
+ "loss": 0.1618,
1757
+ "step": 3808
1758
+ },
1759
+ {
1760
+ "epoch": 10.959885386819485,
1761
+ "grad_norm": 0.34073105454444885,
1762
+ "learning_rate": 4.842133191828072e-06,
1763
+ "loss": 0.0788,
1764
+ "step": 3825
1765
+ },
1766
+ {
1767
+ "epoch": 11.0,
1768
+ "eval_accuracy": 1.0,
1769
+ "eval_f1_macro": 1.0,
1770
+ "eval_f1_micro": 1.0,
1771
+ "eval_f1_weighted": 1.0,
1772
+ "eval_loss": 0.011031342670321465,
1773
+ "eval_precision_macro": 1.0,
1774
+ "eval_precision_micro": 1.0,
1775
+ "eval_precision_weighted": 1.0,
1776
+ "eval_recall_macro": 1.0,
1777
+ "eval_recall_micro": 1.0,
1778
+ "eval_recall_weighted": 1.0,
1779
+ "eval_runtime": 3.4121,
1780
+ "eval_samples_per_second": 204.275,
1781
+ "eval_steps_per_second": 12.895,
1782
+ "step": 3839
1783
+ }
1784
+ ],
1785
+ "logging_steps": 17,
1786
+ "max_steps": 4188,
1787
+ "num_input_tokens_seen": 0,
1788
+ "num_train_epochs": 12,
1789
+ "save_steps": 500,
1790
+ "stateful_callbacks": {
1791
+ "EarlyStoppingCallback": {
1792
+ "args": {
1793
+ "early_stopping_patience": 5,
1794
+ "early_stopping_threshold": 0.01
1795
+ },
1796
+ "attributes": {
1797
+ "early_stopping_patience_counter": 0
1798
+ }
1799
+ },
1800
+ "TrainerControl": {
1801
+ "args": {
1802
+ "should_epoch_stop": false,
1803
+ "should_evaluate": false,
1804
+ "should_log": false,
1805
+ "should_save": true,
1806
+ "should_training_stop": true
1807
+ },
1808
+ "attributes": {}
1809
+ }
1810
+ },
1811
+ "total_flos": 2.3756905482586214e+18,
1812
+ "train_batch_size": 8,
1813
+ "trial_name": null,
1814
+ "trial_params": null
1815
+ }
checkpoint-3839/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d38ad25aa8e094ec9f61233a3f11b558808fa1d76c3288f7c419d449e0139a38
3
+ size 5176
config.json ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224",
3
+ "_num_labels": 83,
4
+ "architectures": [
5
+ "ViTForImageClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.0,
8
+ "encoder_stride": 16,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.0,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "Agnes Skinner",
14
+ "1": "Alec Baldwin",
15
+ "2": "Alex Whitney",
16
+ "3": "Allison Taylor",
17
+ "4": "Apu Nahasapeemapetilon",
18
+ "5": "Artie Ziff",
19
+ "6": "Ashley Grant",
20
+ "7": "Ballet Teacher",
21
+ "8": "Barney Gumble",
22
+ "9": "Bart Simpson",
23
+ "10": "Bernice Hibbert",
24
+ "11": "Carl Carlson",
25
+ "12": "Chief Wiggum",
26
+ "13": "Cletus Spuckler",
27
+ "14": "Comic Book Guy",
28
+ "15": "Dewey Largo",
29
+ "16": "Doctor Hibbert",
30
+ "17": "Doctor Nick",
31
+ "18": "Dolph Starbeam",
32
+ "19": "Drederick Tatum",
33
+ "20": "Edna Krabappel",
34
+ "21": "Fat Tony",
35
+ "22": "Focus Group Manager",
36
+ "23": "Gil Gunderson",
37
+ "24": "Grandpa Simpson",
38
+ "25": "Groundskeeper Willie",
39
+ "26": "Hank Scorpio",
40
+ "27": "Hans Moleman",
41
+ "28": "Helen Lovejoy",
42
+ "29": "Homer Simpson",
43
+ "30": "Jailbird Snake",
44
+ "31": "Janey Powell",
45
+ "32": "Jasper Beardsley",
46
+ "33": "Jessica Lovejoy",
47
+ "34": "Jimbo Jones",
48
+ "35": "Kearney Zzyzwicz",
49
+ "36": "Kent Brockman",
50
+ "37": "Kim Basinger",
51
+ "38": "Kirk Van Houten",
52
+ "39": "Krusty the Clown",
53
+ "40": "Larry Burns",
54
+ "41": "Laura Powers",
55
+ "42": "Lenny Leonard",
56
+ "43": "Lewis Clark",
57
+ "44": "Lindsey Naegle",
58
+ "45": "Lionel Hutz",
59
+ "46": "Lisa Simpson",
60
+ "47": "Luann Van Houten",
61
+ "48": "Maggie Simpson",
62
+ "49": "Manjula Nahasapeemapetilon",
63
+ "50": "Marge Simpson",
64
+ "51": "Martin Prince",
65
+ "52": "Maude Flanders",
66
+ "53": "Mayor Quimby",
67
+ "54": "Milhouse Van Houten",
68
+ "55": "Mindy Simmons",
69
+ "56": "Miss Hoover",
70
+ "57": "Moe Szyslak",
71
+ "58": "Mr Burns",
72
+ "59": "Ned Flanders",
73
+ "60": "Nelson Muntz",
74
+ "61": "Nerd Database",
75
+ "62": "Officer Eddie",
76
+ "63": "Officer Lou",
77
+ "64": "Otto Mann",
78
+ "65": "Patty Bouvier",
79
+ "66": "Principal Skinner",
80
+ "67": "Professor Frink",
81
+ "68": "Rainier Wolfcastle",
82
+ "69": "Ralph Wiggum",
83
+ "70": "Reverend Lovejoy",
84
+ "71": "Rod Flanders",
85
+ "72": "Ruth Powers",
86
+ "73": "Sea Captian",
87
+ "74": "Selma Bouvier",
88
+ "75": "Sherri and Terri",
89
+ "76": "Sideshow Bob",
90
+ "77": "Sideshow Mel",
91
+ "78": "Superintendent Chalmers",
92
+ "79": "Todd Flanders",
93
+ "80": "Troy McClure",
94
+ "81": "Waylon Smithers",
95
+ "82": "Wendell Borton"
96
+ },
97
+ "image_size": 224,
98
+ "initializer_range": 0.02,
99
+ "intermediate_size": 3072,
100
+ "label2id": {
101
+ "Agnes Skinner": 0,
102
+ "Alec Baldwin": 1,
103
+ "Alex Whitney": 2,
104
+ "Allison Taylor": 3,
105
+ "Apu Nahasapeemapetilon": 4,
106
+ "Artie Ziff": 5,
107
+ "Ashley Grant": 6,
108
+ "Ballet Teacher": 7,
109
+ "Barney Gumble": 8,
110
+ "Bart Simpson": 9,
111
+ "Bernice Hibbert": 10,
112
+ "Carl Carlson": 11,
113
+ "Chief Wiggum": 12,
114
+ "Cletus Spuckler": 13,
115
+ "Comic Book Guy": 14,
116
+ "Dewey Largo": 15,
117
+ "Doctor Hibbert": 16,
118
+ "Doctor Nick": 17,
119
+ "Dolph Starbeam": 18,
120
+ "Drederick Tatum": 19,
121
+ "Edna Krabappel": 20,
122
+ "Fat Tony": 21,
123
+ "Focus Group Manager": 22,
124
+ "Gil Gunderson": 23,
125
+ "Grandpa Simpson": 24,
126
+ "Groundskeeper Willie": 25,
127
+ "Hank Scorpio": 26,
128
+ "Hans Moleman": 27,
129
+ "Helen Lovejoy": 28,
130
+ "Homer Simpson": 29,
131
+ "Jailbird Snake": 30,
132
+ "Janey Powell": 31,
133
+ "Jasper Beardsley": 32,
134
+ "Jessica Lovejoy": 33,
135
+ "Jimbo Jones": 34,
136
+ "Kearney Zzyzwicz": 35,
137
+ "Kent Brockman": 36,
138
+ "Kim Basinger": 37,
139
+ "Kirk Van Houten": 38,
140
+ "Krusty the Clown": 39,
141
+ "Larry Burns": 40,
142
+ "Laura Powers": 41,
143
+ "Lenny Leonard": 42,
144
+ "Lewis Clark": 43,
145
+ "Lindsey Naegle": 44,
146
+ "Lionel Hutz": 45,
147
+ "Lisa Simpson": 46,
148
+ "Luann Van Houten": 47,
149
+ "Maggie Simpson": 48,
150
+ "Manjula Nahasapeemapetilon": 49,
151
+ "Marge Simpson": 50,
152
+ "Martin Prince": 51,
153
+ "Maude Flanders": 52,
154
+ "Mayor Quimby": 53,
155
+ "Milhouse Van Houten": 54,
156
+ "Mindy Simmons": 55,
157
+ "Miss Hoover": 56,
158
+ "Moe Szyslak": 57,
159
+ "Mr Burns": 58,
160
+ "Ned Flanders": 59,
161
+ "Nelson Muntz": 60,
162
+ "Nerd Database": 61,
163
+ "Officer Eddie": 62,
164
+ "Officer Lou": 63,
165
+ "Otto Mann": 64,
166
+ "Patty Bouvier": 65,
167
+ "Principal Skinner": 66,
168
+ "Professor Frink": 67,
169
+ "Rainier Wolfcastle": 68,
170
+ "Ralph Wiggum": 69,
171
+ "Reverend Lovejoy": 70,
172
+ "Rod Flanders": 71,
173
+ "Ruth Powers": 72,
174
+ "Sea Captian": 73,
175
+ "Selma Bouvier": 74,
176
+ "Sherri and Terri": 75,
177
+ "Sideshow Bob": 76,
178
+ "Sideshow Mel": 77,
179
+ "Superintendent Chalmers": 78,
180
+ "Todd Flanders": 79,
181
+ "Troy McClure": 80,
182
+ "Waylon Smithers": 81,
183
+ "Wendell Borton": 82
184
+ },
185
+ "layer_norm_eps": 1e-12,
186
+ "model_type": "vit",
187
+ "num_attention_heads": 12,
188
+ "num_channels": 3,
189
+ "num_hidden_layers": 12,
190
+ "patch_size": 16,
191
+ "problem_type": "single_label_classification",
192
+ "qkv_bias": true,
193
+ "torch_dtype": "float32",
194
+ "transformers_version": "4.41.0"
195
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68313e8f772280708ca9f54261e838fd8237b2d20f4e82079e5e2b5ef3bed02f
3
+ size 343473140
preprocessor_config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_valid_processor_keys": [
3
+ "images",
4
+ "do_resize",
5
+ "size",
6
+ "resample",
7
+ "do_rescale",
8
+ "rescale_factor",
9
+ "do_normalize",
10
+ "image_mean",
11
+ "image_std",
12
+ "return_tensors",
13
+ "data_format",
14
+ "input_data_format"
15
+ ],
16
+ "do_normalize": true,
17
+ "do_rescale": true,
18
+ "do_resize": true,
19
+ "image_mean": [
20
+ 0.5,
21
+ 0.5,
22
+ 0.5
23
+ ],
24
+ "image_processor_type": "ViTImageProcessor",
25
+ "image_std": [
26
+ 0.5,
27
+ 0.5,
28
+ 0.5
29
+ ],
30
+ "resample": 2,
31
+ "rescale_factor": 0.00392156862745098,
32
+ "size": {
33
+ "height": 224,
34
+ "width": 224
35
+ }
36
+ }
runs/May28_16-03-03_r-rileybol-simpsons-classifier-pod0heps-7433c-xsamv/events.out.tfevents.1716912184.r-rileybol-simpsons-classifier-pod0heps-7433c-xsamv.159.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c0b256482885d64a0b0f94abf8c27f762ed9de84b3abbd4e4ea0314aa2f075e
3
- size 9198
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:418e0a7a6447f24ccc2c7201f6742127bb6a48564540bfdf87de1d6ddce6a7fb
3
+ size 66162
runs/May28_16-03-03_r-rileybol-simpsons-classifier-pod0heps-7433c-xsamv/events.out.tfevents.1716912712.r-rileybol-simpsons-classifier-pod0heps-7433c-xsamv.159.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95e8c27d8b4d688dcf19d563085f40ec0c1a31bd953dabfdcd4647b729fbb46a
3
+ size 921
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d38ad25aa8e094ec9f61233a3f11b558808fa1d76c3288f7c419d449e0139a38
3
+ size 5176
training_params.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_path": "autotrain-mb2mv-qdf75/autotrain-data",
3
+ "model": "google/vit-base-patch16-224",
4
+ "username": "rileybol",
5
+ "lr": 5e-05,
6
+ "epochs": 12,
7
+ "batch_size": 8,
8
+ "warmup_ratio": 0.1,
9
+ "gradient_accumulation": 1,
10
+ "optimizer": "adamw_torch",
11
+ "scheduler": "linear",
12
+ "weight_decay": 0.0,
13
+ "max_grad_norm": 1.0,
14
+ "seed": 42,
15
+ "train_split": "train",
16
+ "valid_split": "validation",
17
+ "logging_steps": -1,
18
+ "project_name": "autotrain-mb2mv-qdf75",
19
+ "auto_find_batch_size": false,
20
+ "mixed_precision": "fp16",
21
+ "save_total_limit": 1,
22
+ "push_to_hub": true,
23
+ "evaluation_strategy": "epoch",
24
+ "image_column": "autotrain_image",
25
+ "target_column": "autotrain_label",
26
+ "log": "tensorboard",
27
+ "early_stopping_patience": 5,
28
+ "early_stopping_threshold": 0.01
29
+ }