truongson412
commited on
Commit
•
cdc6019
1
Parent(s):
a7bc690
Training in progress, epoch 0
Browse files
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 347498816
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d499fddb89fc0ccb44379fe6ff797442703d79e38c6f891c1702d3d1a17b5aa0
|
3 |
size 347498816
|
runs/Oct30_13-07-11_101dbfee8143/events.out.tfevents.1730295902.101dbfee8143.30.5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3ac3d537818b6a624c2301b26fafce02badc8fa6ddf322fa97d2ef2c3eccb66
|
3 |
+
size 405
|
runs/Oct30_13-45-02_101dbfee8143/events.out.tfevents.1730295908.101dbfee8143.30.6
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:300f755dda9d7a65dcdcebdcab9b665298f3aecd596d339cd6ba65138b43bc48
|
3 |
+
size 7043
|
trainer_state.json
CHANGED
@@ -1,303 +1,412 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "microsoft/swin-base-patch4-window7-224-finetuned-dsc/checkpoint-
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.14869888475836432,
|
13 |
-
"grad_norm":
|
14 |
-
"learning_rate": 2.
|
15 |
-
"loss": 0.
|
16 |
"step": 10
|
17 |
},
|
18 |
{
|
19 |
"epoch": 0.29739776951672864,
|
20 |
-
"grad_norm":
|
21 |
-
"learning_rate":
|
22 |
-
"loss": 0.
|
23 |
"step": 20
|
24 |
},
|
25 |
{
|
26 |
"epoch": 0.44609665427509293,
|
27 |
-
"grad_norm":
|
28 |
-
"learning_rate":
|
29 |
-
"loss": 0.
|
30 |
"step": 30
|
31 |
},
|
32 |
{
|
33 |
"epoch": 0.5947955390334573,
|
34 |
-
"grad_norm":
|
35 |
-
"learning_rate":
|
36 |
-
"loss": 0.
|
37 |
"step": 40
|
38 |
},
|
39 |
{
|
40 |
"epoch": 0.7434944237918215,
|
41 |
-
"grad_norm":
|
42 |
-
"learning_rate": 9.
|
43 |
-
"loss": 0.
|
44 |
"step": 50
|
45 |
},
|
46 |
{
|
47 |
"epoch": 0.8921933085501859,
|
48 |
-
"grad_norm":
|
49 |
-
"learning_rate": 9.
|
50 |
-
"loss": 0.
|
51 |
"step": 60
|
52 |
},
|
53 |
{
|
54 |
"epoch": 0.9962825278810409,
|
55 |
-
"eval_f1": 0.
|
56 |
-
"eval_loss": 0.
|
57 |
-
"eval_runtime": 41.
|
58 |
-
"eval_samples_per_second": 51.
|
59 |
-
"eval_steps_per_second": 1.
|
60 |
"step": 67
|
61 |
},
|
62 |
{
|
63 |
"epoch": 1.0408921933085502,
|
64 |
-
"grad_norm":
|
65 |
-
"learning_rate":
|
66 |
-
"loss": 0.
|
67 |
"step": 70
|
68 |
},
|
69 |
{
|
70 |
"epoch": 1.1895910780669146,
|
71 |
-
"grad_norm":
|
72 |
-
"learning_rate":
|
73 |
-
"loss": 0.
|
74 |
"step": 80
|
75 |
},
|
76 |
{
|
77 |
"epoch": 1.3382899628252787,
|
78 |
-
"grad_norm": 4.
|
79 |
-
"learning_rate": 8.
|
80 |
-
"loss": 0.
|
81 |
"step": 90
|
82 |
},
|
83 |
{
|
84 |
"epoch": 1.486988847583643,
|
85 |
-
"grad_norm":
|
86 |
-
"learning_rate":
|
87 |
-
"loss": 0.
|
88 |
"step": 100
|
89 |
},
|
90 |
{
|
91 |
"epoch": 1.6356877323420074,
|
92 |
-
"grad_norm":
|
93 |
-
"learning_rate":
|
94 |
-
"loss": 0.
|
95 |
"step": 110
|
96 |
},
|
97 |
{
|
98 |
"epoch": 1.7843866171003717,
|
99 |
-
"grad_norm": 4.
|
100 |
-
"learning_rate":
|
101 |
-
"loss": 0.
|
102 |
"step": 120
|
103 |
},
|
104 |
{
|
105 |
"epoch": 1.933085501858736,
|
106 |
-
"grad_norm":
|
107 |
-
"learning_rate":
|
108 |
-
"loss": 0.
|
109 |
"step": 130
|
110 |
},
|
111 |
{
|
112 |
"epoch": 1.9925650557620818,
|
113 |
-
"eval_f1": 0.
|
114 |
-
"eval_loss": 0.
|
115 |
-
"eval_runtime":
|
116 |
-
"eval_samples_per_second": 51.
|
117 |
-
"eval_steps_per_second": 1.
|
118 |
"step": 134
|
119 |
},
|
120 |
{
|
121 |
"epoch": 2.0817843866171004,
|
122 |
-
"grad_norm":
|
123 |
-
"learning_rate":
|
124 |
-
"loss": 0.
|
125 |
"step": 140
|
126 |
},
|
127 |
{
|
128 |
"epoch": 2.2304832713754648,
|
129 |
-
"grad_norm":
|
130 |
-
"learning_rate":
|
131 |
-
"loss": 0.
|
132 |
"step": 150
|
133 |
},
|
134 |
{
|
135 |
"epoch": 2.379182156133829,
|
136 |
-
"grad_norm":
|
137 |
-
"learning_rate":
|
138 |
-
"loss": 0.
|
139 |
"step": 160
|
140 |
},
|
141 |
{
|
142 |
"epoch": 2.5278810408921935,
|
143 |
-
"grad_norm":
|
144 |
-
"learning_rate":
|
145 |
-
"loss": 0.
|
146 |
"step": 170
|
147 |
},
|
148 |
{
|
149 |
"epoch": 2.6765799256505574,
|
150 |
-
"grad_norm":
|
151 |
-
"learning_rate":
|
152 |
-
"loss": 0.
|
153 |
"step": 180
|
154 |
},
|
155 |
{
|
156 |
"epoch": 2.825278810408922,
|
157 |
-
"grad_norm": 3.
|
158 |
-
"learning_rate":
|
159 |
-
"loss": 0.
|
160 |
"step": 190
|
161 |
},
|
162 |
{
|
163 |
"epoch": 2.973977695167286,
|
164 |
-
"grad_norm": 3.
|
165 |
-
"learning_rate":
|
166 |
-
"loss": 0.
|
167 |
"step": 200
|
168 |
},
|
169 |
{
|
170 |
"epoch": 2.9888475836431225,
|
171 |
-
"eval_f1": 0.
|
172 |
-
"eval_loss": 0.
|
173 |
-
"eval_runtime":
|
174 |
-
"eval_samples_per_second": 51.
|
175 |
-
"eval_steps_per_second": 1.
|
176 |
"step": 201
|
177 |
},
|
178 |
{
|
179 |
"epoch": 3.1226765799256504,
|
180 |
-
"grad_norm": 4.
|
181 |
-
"learning_rate":
|
182 |
-
"loss": 0.
|
183 |
"step": 210
|
184 |
},
|
185 |
{
|
186 |
"epoch": 3.2713754646840147,
|
187 |
-
"grad_norm":
|
188 |
-
"learning_rate":
|
189 |
-
"loss": 0.
|
190 |
"step": 220
|
191 |
},
|
192 |
{
|
193 |
"epoch": 3.420074349442379,
|
194 |
-
"grad_norm":
|
195 |
-
"learning_rate":
|
196 |
-
"loss": 0.
|
197 |
"step": 230
|
198 |
},
|
199 |
{
|
200 |
"epoch": 3.5687732342007434,
|
201 |
-
"grad_norm": 4.
|
202 |
-
"learning_rate":
|
203 |
-
"loss": 0.
|
204 |
"step": 240
|
205 |
},
|
206 |
{
|
207 |
"epoch": 3.717472118959108,
|
208 |
-
"grad_norm":
|
209 |
-
"learning_rate":
|
210 |
-
"loss": 0.
|
211 |
"step": 250
|
212 |
},
|
213 |
{
|
214 |
"epoch": 3.866171003717472,
|
215 |
-
"grad_norm":
|
216 |
-
"learning_rate":
|
217 |
-
"loss": 0.
|
218 |
"step": 260
|
219 |
},
|
220 |
{
|
221 |
"epoch": 4.0,
|
222 |
-
"eval_f1": 0.
|
223 |
-
"eval_loss": 0.
|
224 |
-
"eval_runtime": 41.
|
225 |
-
"eval_samples_per_second": 51.
|
226 |
-
"eval_steps_per_second": 1.
|
227 |
"step": 269
|
228 |
},
|
229 |
{
|
230 |
"epoch": 4.014869888475836,
|
231 |
-
"grad_norm":
|
232 |
-
"learning_rate":
|
233 |
-
"loss": 0.
|
234 |
"step": 270
|
235 |
},
|
236 |
{
|
237 |
"epoch": 4.163568773234201,
|
238 |
-
"grad_norm":
|
239 |
-
"learning_rate":
|
240 |
-
"loss": 0.
|
241 |
"step": 280
|
242 |
},
|
243 |
{
|
244 |
"epoch": 4.312267657992565,
|
245 |
-
"grad_norm": 3.
|
246 |
-
"learning_rate":
|
247 |
-
"loss": 0.
|
248 |
"step": 290
|
249 |
},
|
250 |
{
|
251 |
"epoch": 4.4609665427509295,
|
252 |
-
"grad_norm":
|
253 |
-
"learning_rate":
|
254 |
-
"loss": 0.
|
255 |
"step": 300
|
256 |
},
|
257 |
{
|
258 |
"epoch": 4.609665427509293,
|
259 |
-
"grad_norm": 3.
|
260 |
-
"learning_rate":
|
261 |
-
"loss": 0.
|
262 |
"step": 310
|
263 |
},
|
264 |
{
|
265 |
"epoch": 4.758364312267658,
|
266 |
-
"grad_norm":
|
267 |
-
"learning_rate":
|
268 |
-
"loss": 0.
|
269 |
"step": 320
|
270 |
},
|
271 |
{
|
272 |
"epoch": 4.907063197026022,
|
273 |
-
"grad_norm":
|
274 |
-
"learning_rate":
|
275 |
-
"loss": 0.
|
276 |
"step": 330
|
277 |
},
|
278 |
{
|
279 |
-
"epoch": 4.
|
280 |
-
"eval_f1": 0.
|
281 |
-
"eval_loss": 0.
|
282 |
-
"eval_runtime":
|
283 |
-
"eval_samples_per_second":
|
284 |
-
"eval_steps_per_second": 1.
|
285 |
-
"step":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
286 |
},
|
287 |
{
|
288 |
-
"epoch":
|
289 |
-
"step":
|
290 |
-
"total_flos":
|
291 |
-
"train_loss": 0.
|
292 |
-
"train_runtime":
|
293 |
-
"train_samples_per_second": 27.
|
294 |
"train_steps_per_second": 0.212
|
295 |
}
|
296 |
],
|
297 |
"logging_steps": 10,
|
298 |
-
"max_steps":
|
299 |
"num_input_tokens_seen": 0,
|
300 |
-
"num_train_epochs":
|
301 |
"save_steps": 500,
|
302 |
"stateful_callbacks": {
|
303 |
"TrainerControl": {
|
@@ -311,7 +420,7 @@
|
|
311 |
"attributes": {}
|
312 |
}
|
313 |
},
|
314 |
-
"total_flos":
|
315 |
"train_batch_size": 32,
|
316 |
"trial_name": null,
|
317 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.6374650512581547,
|
3 |
+
"best_model_checkpoint": "microsoft/swin-base-patch4-window7-224-finetuned-dsc/checkpoint-336",
|
4 |
+
"epoch": 6.973977695167286,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 469,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.14869888475836432,
|
13 |
+
"grad_norm": 4.006267070770264,
|
14 |
+
"learning_rate": 2.127659574468085e-07,
|
15 |
+
"loss": 0.6718,
|
16 |
"step": 10
|
17 |
},
|
18 |
{
|
19 |
"epoch": 0.29739776951672864,
|
20 |
+
"grad_norm": 4.247750759124756,
|
21 |
+
"learning_rate": 4.25531914893617e-07,
|
22 |
+
"loss": 0.664,
|
23 |
"step": 20
|
24 |
},
|
25 |
{
|
26 |
"epoch": 0.44609665427509293,
|
27 |
+
"grad_norm": 3.412334680557251,
|
28 |
+
"learning_rate": 6.382978723404255e-07,
|
29 |
+
"loss": 0.6556,
|
30 |
"step": 30
|
31 |
},
|
32 |
{
|
33 |
"epoch": 0.5947955390334573,
|
34 |
+
"grad_norm": 3.8701229095458984,
|
35 |
+
"learning_rate": 8.51063829787234e-07,
|
36 |
+
"loss": 0.6526,
|
37 |
"step": 40
|
38 |
},
|
39 |
{
|
40 |
"epoch": 0.7434944237918215,
|
41 |
+
"grad_norm": 3.543933391571045,
|
42 |
+
"learning_rate": 9.928909952606635e-07,
|
43 |
+
"loss": 0.6506,
|
44 |
"step": 50
|
45 |
},
|
46 |
{
|
47 |
"epoch": 0.8921933085501859,
|
48 |
+
"grad_norm": 3.1438159942626953,
|
49 |
+
"learning_rate": 9.691943127962085e-07,
|
50 |
+
"loss": 0.6558,
|
51 |
"step": 60
|
52 |
},
|
53 |
{
|
54 |
"epoch": 0.9962825278810409,
|
55 |
+
"eval_f1": 0.624883504193849,
|
56 |
+
"eval_loss": 0.654695987701416,
|
57 |
+
"eval_runtime": 41.9567,
|
58 |
+
"eval_samples_per_second": 51.148,
|
59 |
+
"eval_steps_per_second": 1.621,
|
60 |
"step": 67
|
61 |
},
|
62 |
{
|
63 |
"epoch": 1.0408921933085502,
|
64 |
+
"grad_norm": 4.4484333992004395,
|
65 |
+
"learning_rate": 9.454976303317536e-07,
|
66 |
+
"loss": 0.6682,
|
67 |
"step": 70
|
68 |
},
|
69 |
{
|
70 |
"epoch": 1.1895910780669146,
|
71 |
+
"grad_norm": 4.100910663604736,
|
72 |
+
"learning_rate": 9.218009478672986e-07,
|
73 |
+
"loss": 0.651,
|
74 |
"step": 80
|
75 |
},
|
76 |
{
|
77 |
"epoch": 1.3382899628252787,
|
78 |
+
"grad_norm": 4.338140964508057,
|
79 |
+
"learning_rate": 8.981042654028435e-07,
|
80 |
+
"loss": 0.6381,
|
81 |
"step": 90
|
82 |
},
|
83 |
{
|
84 |
"epoch": 1.486988847583643,
|
85 |
+
"grad_norm": 3.433474540710449,
|
86 |
+
"learning_rate": 8.744075829383885e-07,
|
87 |
+
"loss": 0.6617,
|
88 |
"step": 100
|
89 |
},
|
90 |
{
|
91 |
"epoch": 1.6356877323420074,
|
92 |
+
"grad_norm": 3.912813186645508,
|
93 |
+
"learning_rate": 8.507109004739336e-07,
|
94 |
+
"loss": 0.6622,
|
95 |
"step": 110
|
96 |
},
|
97 |
{
|
98 |
"epoch": 1.7843866171003717,
|
99 |
+
"grad_norm": 4.420755863189697,
|
100 |
+
"learning_rate": 8.270142180094787e-07,
|
101 |
+
"loss": 0.6466,
|
102 |
"step": 120
|
103 |
},
|
104 |
{
|
105 |
"epoch": 1.933085501858736,
|
106 |
+
"grad_norm": 3.3080854415893555,
|
107 |
+
"learning_rate": 8.033175355450236e-07,
|
108 |
+
"loss": 0.6504,
|
109 |
"step": 130
|
110 |
},
|
111 |
{
|
112 |
"epoch": 1.9925650557620818,
|
113 |
+
"eval_f1": 0.6290773532152842,
|
114 |
+
"eval_loss": 0.6480793356895447,
|
115 |
+
"eval_runtime": 42.0039,
|
116 |
+
"eval_samples_per_second": 51.09,
|
117 |
+
"eval_steps_per_second": 1.619,
|
118 |
"step": 134
|
119 |
},
|
120 |
{
|
121 |
"epoch": 2.0817843866171004,
|
122 |
+
"grad_norm": 3.932919979095459,
|
123 |
+
"learning_rate": 7.796208530805687e-07,
|
124 |
+
"loss": 0.6428,
|
125 |
"step": 140
|
126 |
},
|
127 |
{
|
128 |
"epoch": 2.2304832713754648,
|
129 |
+
"grad_norm": 3.613367795944214,
|
130 |
+
"learning_rate": 7.559241706161137e-07,
|
131 |
+
"loss": 0.6428,
|
132 |
"step": 150
|
133 |
},
|
134 |
{
|
135 |
"epoch": 2.379182156133829,
|
136 |
+
"grad_norm": 3.385443687438965,
|
137 |
+
"learning_rate": 7.322274881516587e-07,
|
138 |
+
"loss": 0.6494,
|
139 |
"step": 160
|
140 |
},
|
141 |
{
|
142 |
"epoch": 2.5278810408921935,
|
143 |
+
"grad_norm": 6.523526668548584,
|
144 |
+
"learning_rate": 7.085308056872038e-07,
|
145 |
+
"loss": 0.646,
|
146 |
"step": 170
|
147 |
},
|
148 |
{
|
149 |
"epoch": 2.6765799256505574,
|
150 |
+
"grad_norm": 4.9948554039001465,
|
151 |
+
"learning_rate": 6.848341232227488e-07,
|
152 |
+
"loss": 0.6508,
|
153 |
"step": 180
|
154 |
},
|
155 |
{
|
156 |
"epoch": 2.825278810408922,
|
157 |
+
"grad_norm": 3.637260913848877,
|
158 |
+
"learning_rate": 6.611374407582938e-07,
|
159 |
+
"loss": 0.6417,
|
160 |
"step": 190
|
161 |
},
|
162 |
{
|
163 |
"epoch": 2.973977695167286,
|
164 |
+
"grad_norm": 3.3876774311065674,
|
165 |
+
"learning_rate": 6.374407582938388e-07,
|
166 |
+
"loss": 0.6595,
|
167 |
"step": 200
|
168 |
},
|
169 |
{
|
170 |
"epoch": 2.9888475836431225,
|
171 |
+
"eval_f1": 0.6342031686859273,
|
172 |
+
"eval_loss": 0.6434848308563232,
|
173 |
+
"eval_runtime": 42.0287,
|
174 |
+
"eval_samples_per_second": 51.06,
|
175 |
+
"eval_steps_per_second": 1.618,
|
176 |
"step": 201
|
177 |
},
|
178 |
{
|
179 |
"epoch": 3.1226765799256504,
|
180 |
+
"grad_norm": 4.729325294494629,
|
181 |
+
"learning_rate": 6.137440758293838e-07,
|
182 |
+
"loss": 0.6284,
|
183 |
"step": 210
|
184 |
},
|
185 |
{
|
186 |
"epoch": 3.2713754646840147,
|
187 |
+
"grad_norm": 3.6306986808776855,
|
188 |
+
"learning_rate": 5.900473933649289e-07,
|
189 |
+
"loss": 0.6393,
|
190 |
"step": 220
|
191 |
},
|
192 |
{
|
193 |
"epoch": 3.420074349442379,
|
194 |
+
"grad_norm": 4.591604232788086,
|
195 |
+
"learning_rate": 5.66350710900474e-07,
|
196 |
+
"loss": 0.6478,
|
197 |
"step": 230
|
198 |
},
|
199 |
{
|
200 |
"epoch": 3.5687732342007434,
|
201 |
+
"grad_norm": 4.6553473472595215,
|
202 |
+
"learning_rate": 5.426540284360189e-07,
|
203 |
+
"loss": 0.6233,
|
204 |
"step": 240
|
205 |
},
|
206 |
{
|
207 |
"epoch": 3.717472118959108,
|
208 |
+
"grad_norm": 3.999525547027588,
|
209 |
+
"learning_rate": 5.189573459715639e-07,
|
210 |
+
"loss": 0.6454,
|
211 |
"step": 250
|
212 |
},
|
213 |
{
|
214 |
"epoch": 3.866171003717472,
|
215 |
+
"grad_norm": 4.377701759338379,
|
216 |
+
"learning_rate": 4.95260663507109e-07,
|
217 |
+
"loss": 0.6662,
|
218 |
"step": 260
|
219 |
},
|
220 |
{
|
221 |
"epoch": 4.0,
|
222 |
+
"eval_f1": 0.6356011183597391,
|
223 |
+
"eval_loss": 0.6397082805633545,
|
224 |
+
"eval_runtime": 41.9159,
|
225 |
+
"eval_samples_per_second": 51.198,
|
226 |
+
"eval_steps_per_second": 1.622,
|
227 |
"step": 269
|
228 |
},
|
229 |
{
|
230 |
"epoch": 4.014869888475836,
|
231 |
+
"grad_norm": 4.2719950675964355,
|
232 |
+
"learning_rate": 4.71563981042654e-07,
|
233 |
+
"loss": 0.6457,
|
234 |
"step": 270
|
235 |
},
|
236 |
{
|
237 |
"epoch": 4.163568773234201,
|
238 |
+
"grad_norm": 4.870419025421143,
|
239 |
+
"learning_rate": 4.4786729857819903e-07,
|
240 |
+
"loss": 0.6526,
|
241 |
"step": 280
|
242 |
},
|
243 |
{
|
244 |
"epoch": 4.312267657992565,
|
245 |
+
"grad_norm": 3.5545692443847656,
|
246 |
+
"learning_rate": 4.2417061611374406e-07,
|
247 |
+
"loss": 0.6337,
|
248 |
"step": 290
|
249 |
},
|
250 |
{
|
251 |
"epoch": 4.4609665427509295,
|
252 |
+
"grad_norm": 4.513426780700684,
|
253 |
+
"learning_rate": 4.004739336492891e-07,
|
254 |
+
"loss": 0.6386,
|
255 |
"step": 300
|
256 |
},
|
257 |
{
|
258 |
"epoch": 4.609665427509293,
|
259 |
+
"grad_norm": 3.423133611679077,
|
260 |
+
"learning_rate": 3.7677725118483413e-07,
|
261 |
+
"loss": 0.633,
|
262 |
"step": 310
|
263 |
},
|
264 |
{
|
265 |
"epoch": 4.758364312267658,
|
266 |
+
"grad_norm": 2.879420042037964,
|
267 |
+
"learning_rate": 3.530805687203791e-07,
|
268 |
+
"loss": 0.6626,
|
269 |
"step": 320
|
270 |
},
|
271 |
{
|
272 |
"epoch": 4.907063197026022,
|
273 |
+
"grad_norm": 7.331841468811035,
|
274 |
+
"learning_rate": 3.293838862559242e-07,
|
275 |
+
"loss": 0.6409,
|
276 |
"step": 330
|
277 |
},
|
278 |
{
|
279 |
+
"epoch": 4.996282527881041,
|
280 |
+
"eval_f1": 0.6374650512581547,
|
281 |
+
"eval_loss": 0.6374994516372681,
|
282 |
+
"eval_runtime": 41.9902,
|
283 |
+
"eval_samples_per_second": 51.107,
|
284 |
+
"eval_steps_per_second": 1.619,
|
285 |
+
"step": 336
|
286 |
+
},
|
287 |
+
{
|
288 |
+
"epoch": 5.055762081784387,
|
289 |
+
"grad_norm": 4.733645915985107,
|
290 |
+
"learning_rate": 3.0568720379146917e-07,
|
291 |
+
"loss": 0.6435,
|
292 |
+
"step": 340
|
293 |
+
},
|
294 |
+
{
|
295 |
+
"epoch": 5.204460966542751,
|
296 |
+
"grad_norm": 4.443199634552002,
|
297 |
+
"learning_rate": 2.819905213270142e-07,
|
298 |
+
"loss": 0.6461,
|
299 |
+
"step": 350
|
300 |
+
},
|
301 |
+
{
|
302 |
+
"epoch": 5.353159851301116,
|
303 |
+
"grad_norm": 3.4727444648742676,
|
304 |
+
"learning_rate": 2.5829383886255924e-07,
|
305 |
+
"loss": 0.6459,
|
306 |
+
"step": 360
|
307 |
+
},
|
308 |
+
{
|
309 |
+
"epoch": 5.5018587360594795,
|
310 |
+
"grad_norm": 5.56289529800415,
|
311 |
+
"learning_rate": 2.3459715639810427e-07,
|
312 |
+
"loss": 0.6447,
|
313 |
+
"step": 370
|
314 |
+
},
|
315 |
+
{
|
316 |
+
"epoch": 5.650557620817844,
|
317 |
+
"grad_norm": 4.653774738311768,
|
318 |
+
"learning_rate": 2.109004739336493e-07,
|
319 |
+
"loss": 0.6512,
|
320 |
+
"step": 380
|
321 |
+
},
|
322 |
+
{
|
323 |
+
"epoch": 5.799256505576208,
|
324 |
+
"grad_norm": 3.595479726791382,
|
325 |
+
"learning_rate": 1.8720379146919428e-07,
|
326 |
+
"loss": 0.6433,
|
327 |
+
"step": 390
|
328 |
+
},
|
329 |
+
{
|
330 |
+
"epoch": 5.947955390334572,
|
331 |
+
"grad_norm": 3.631420135498047,
|
332 |
+
"learning_rate": 1.6350710900473932e-07,
|
333 |
+
"loss": 0.637,
|
334 |
+
"step": 400
|
335 |
+
},
|
336 |
+
{
|
337 |
+
"epoch": 5.992565055762082,
|
338 |
+
"eval_f1": 0.6369990680335508,
|
339 |
+
"eval_loss": 0.6362724304199219,
|
340 |
+
"eval_runtime": 41.9377,
|
341 |
+
"eval_samples_per_second": 51.171,
|
342 |
+
"eval_steps_per_second": 1.621,
|
343 |
+
"step": 403
|
344 |
+
},
|
345 |
+
{
|
346 |
+
"epoch": 6.096654275092937,
|
347 |
+
"grad_norm": 5.001819133758545,
|
348 |
+
"learning_rate": 1.3981042654028435e-07,
|
349 |
+
"loss": 0.6312,
|
350 |
+
"step": 410
|
351 |
+
},
|
352 |
+
{
|
353 |
+
"epoch": 6.245353159851301,
|
354 |
+
"grad_norm": 4.287126064300537,
|
355 |
+
"learning_rate": 1.1611374407582938e-07,
|
356 |
+
"loss": 0.6277,
|
357 |
+
"step": 420
|
358 |
+
},
|
359 |
+
{
|
360 |
+
"epoch": 6.394052044609666,
|
361 |
+
"grad_norm": 3.234271764755249,
|
362 |
+
"learning_rate": 9.24170616113744e-08,
|
363 |
+
"loss": 0.6272,
|
364 |
+
"step": 430
|
365 |
+
},
|
366 |
+
{
|
367 |
+
"epoch": 6.5427509293680295,
|
368 |
+
"grad_norm": 7.107583045959473,
|
369 |
+
"learning_rate": 6.872037914691943e-08,
|
370 |
+
"loss": 0.6462,
|
371 |
+
"step": 440
|
372 |
+
},
|
373 |
+
{
|
374 |
+
"epoch": 6.691449814126394,
|
375 |
+
"grad_norm": 4.110471725463867,
|
376 |
+
"learning_rate": 4.5023696682464454e-08,
|
377 |
+
"loss": 0.6399,
|
378 |
+
"step": 450
|
379 |
+
},
|
380 |
+
{
|
381 |
+
"epoch": 6.840148698884758,
|
382 |
+
"grad_norm": 5.2897186279296875,
|
383 |
+
"learning_rate": 2.132701421800948e-08,
|
384 |
+
"loss": 0.6442,
|
385 |
+
"step": 460
|
386 |
+
},
|
387 |
+
{
|
388 |
+
"epoch": 6.973977695167286,
|
389 |
+
"eval_f1": 0.6369990680335508,
|
390 |
+
"eval_loss": 0.6359897255897522,
|
391 |
+
"eval_runtime": 42.3917,
|
392 |
+
"eval_samples_per_second": 50.623,
|
393 |
+
"eval_steps_per_second": 1.604,
|
394 |
+
"step": 469
|
395 |
},
|
396 |
{
|
397 |
+
"epoch": 6.973977695167286,
|
398 |
+
"step": 469,
|
399 |
+
"total_flos": 4.691018488784044e+18,
|
400 |
+
"train_loss": 0.6468103404746635,
|
401 |
+
"train_runtime": 2213.3268,
|
402 |
+
"train_samples_per_second": 27.142,
|
403 |
"train_steps_per_second": 0.212
|
404 |
}
|
405 |
],
|
406 |
"logging_steps": 10,
|
407 |
+
"max_steps": 469,
|
408 |
"num_input_tokens_seen": 0,
|
409 |
+
"num_train_epochs": 7,
|
410 |
"save_steps": 500,
|
411 |
"stateful_callbacks": {
|
412 |
"TrainerControl": {
|
|
|
420 |
"attributes": {}
|
421 |
}
|
422 |
},
|
423 |
+
"total_flos": 4.691018488784044e+18,
|
424 |
"train_batch_size": 32,
|
425 |
"trial_name": null,
|
426 |
"trial_params": null
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5304
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:65fce2aea49d7d2f597088fe48421f8e687f6ec46b1fbee8d85fc5e4758e0ed1
|
3 |
size 5304
|