JaydenJH commited on
Commit
8a0fe8f
1 Parent(s): 18626b4

Training in progress, epoch 1

Browse files
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.977645305514158,
3
+ "eval_accuracy": 0.7969494756911344,
4
+ "eval_loss": 0.5111686587333679,
5
+ "eval_runtime": 6.0944,
6
+ "eval_samples_per_second": 172.126,
7
+ "eval_steps_per_second": 21.659,
8
+ "total_flos": 8.688451575191962e+17,
9
+ "train_loss": 0.5594642650581405,
10
+ "train_runtime": 488.8414,
11
+ "train_samples_per_second": 54.864,
12
+ "train_steps_per_second": 1.708
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.977645305514158,
3
+ "eval_accuracy": 0.7969494756911344,
4
+ "eval_loss": 0.5111686587333679,
5
+ "eval_runtime": 6.0944,
6
+ "eval_samples_per_second": 172.126,
7
+ "eval_steps_per_second": 21.659
8
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4ebfec84dd05af3c9fc89b09f055580171d2c1f28eee92dbf339302f8b486bb
3
  size 110356296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70d91f382c98700925bf5e001fc53f6a9cd8303be232da5879b68b6d4afb7fc4
3
  size 110356296
runs/May17_14-13-40_DESKTOP-2L4O0E9/events.out.tfevents.1715923373.DESKTOP-2L4O0E9.19356.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f50039fd5ca1dee50631ae83d76b9c6bcf65b447d89905b1a9c36cbc57cb05a5
3
+ size 411
runs/May17_14-25-43_DESKTOP-2L4O0E9/events.out.tfevents.1715923552.DESKTOP-2L4O0E9.19356.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e8cb88b4c713577c8f2173079446f3a5156dd2d5b4991af496c057b34c9a01f
3
+ size 12953
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.977645305514158,
3
+ "total_flos": 8.688451575191962e+17,
4
+ "train_loss": 0.5594642650581405,
5
+ "train_runtime": 488.8414,
6
+ "train_samples_per_second": 54.864,
7
+ "train_steps_per_second": 1.708
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,656 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7969494756911344,
3
+ "best_model_checkpoint": "swinv2-tiny-patch4-window8-256-finetuned-eurosat\\checkpoint-835",
4
+ "epoch": 4.977645305514158,
5
+ "eval_steps": 500,
6
+ "global_step": 835,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.05961251862891207,
13
+ "grad_norm": 8.29847240447998,
14
+ "learning_rate": 5.9523809523809525e-06,
15
+ "loss": 0.7654,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.11922503725782414,
20
+ "grad_norm": 10.261287689208984,
21
+ "learning_rate": 1.1904761904761905e-05,
22
+ "loss": 0.6664,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.17883755588673622,
27
+ "grad_norm": 7.24470329284668,
28
+ "learning_rate": 1.785714285714286e-05,
29
+ "loss": 0.6438,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.23845007451564829,
34
+ "grad_norm": 11.913352012634277,
35
+ "learning_rate": 2.380952380952381e-05,
36
+ "loss": 0.7519,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.29806259314456035,
41
+ "grad_norm": 7.875146389007568,
42
+ "learning_rate": 2.9761904761904762e-05,
43
+ "loss": 0.596,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.35767511177347244,
48
+ "grad_norm": 7.428829193115234,
49
+ "learning_rate": 3.571428571428572e-05,
50
+ "loss": 0.5545,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.4172876304023845,
55
+ "grad_norm": 14.340974807739258,
56
+ "learning_rate": 4.166666666666667e-05,
57
+ "loss": 0.5673,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.47690014903129657,
62
+ "grad_norm": 9.428411483764648,
63
+ "learning_rate": 4.761904761904762e-05,
64
+ "loss": 0.5114,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.5365126676602087,
69
+ "grad_norm": 13.027579307556152,
70
+ "learning_rate": 4.9600532623169113e-05,
71
+ "loss": 0.4848,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.5961251862891207,
76
+ "grad_norm": 13.216593742370605,
77
+ "learning_rate": 4.893475366178429e-05,
78
+ "loss": 0.5013,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.6557377049180327,
83
+ "grad_norm": 10.671051979064941,
84
+ "learning_rate": 4.826897470039947e-05,
85
+ "loss": 0.5354,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.7153502235469449,
90
+ "grad_norm": 9.414480209350586,
91
+ "learning_rate": 4.760319573901465e-05,
92
+ "loss": 0.526,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.7749627421758569,
97
+ "grad_norm": 11.687104225158691,
98
+ "learning_rate": 4.6937416777629825e-05,
99
+ "loss": 0.5206,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.834575260804769,
104
+ "grad_norm": 9.784751892089844,
105
+ "learning_rate": 4.6271637816245015e-05,
106
+ "loss": 0.4564,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.8941877794336811,
111
+ "grad_norm": 17.176158905029297,
112
+ "learning_rate": 4.560585885486019e-05,
113
+ "loss": 0.6716,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.9538002980625931,
118
+ "grad_norm": 15.241743087768555,
119
+ "learning_rate": 4.494007989347537e-05,
120
+ "loss": 0.6594,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.9955290611028316,
125
+ "eval_accuracy": 0.7426120114394662,
126
+ "eval_loss": 0.6859908103942871,
127
+ "eval_runtime": 8.046,
128
+ "eval_samples_per_second": 130.375,
129
+ "eval_steps_per_second": 16.406,
130
+ "step": 167
131
+ },
132
+ {
133
+ "epoch": 1.0134128166915053,
134
+ "grad_norm": 10.333115577697754,
135
+ "learning_rate": 4.427430093209054e-05,
136
+ "loss": 0.7604,
137
+ "step": 170
138
+ },
139
+ {
140
+ "epoch": 1.0730253353204173,
141
+ "grad_norm": 7.111759185791016,
142
+ "learning_rate": 4.3608521970705726e-05,
143
+ "loss": 0.6518,
144
+ "step": 180
145
+ },
146
+ {
147
+ "epoch": 1.1326378539493294,
148
+ "grad_norm": 11.78957462310791,
149
+ "learning_rate": 4.294274300932091e-05,
150
+ "loss": 0.7004,
151
+ "step": 190
152
+ },
153
+ {
154
+ "epoch": 1.1922503725782414,
155
+ "grad_norm": 8.77928352355957,
156
+ "learning_rate": 4.2276964047936085e-05,
157
+ "loss": 0.706,
158
+ "step": 200
159
+ },
160
+ {
161
+ "epoch": 1.2518628912071534,
162
+ "grad_norm": 8.210531234741211,
163
+ "learning_rate": 4.161118508655127e-05,
164
+ "loss": 0.624,
165
+ "step": 210
166
+ },
167
+ {
168
+ "epoch": 1.3114754098360657,
169
+ "grad_norm": 10.165024757385254,
170
+ "learning_rate": 4.0945406125166444e-05,
171
+ "loss": 0.6461,
172
+ "step": 220
173
+ },
174
+ {
175
+ "epoch": 1.3710879284649775,
176
+ "grad_norm": 10.348404884338379,
177
+ "learning_rate": 4.027962716378163e-05,
178
+ "loss": 0.6481,
179
+ "step": 230
180
+ },
181
+ {
182
+ "epoch": 1.4307004470938898,
183
+ "grad_norm": 11.13882064819336,
184
+ "learning_rate": 3.961384820239681e-05,
185
+ "loss": 0.7198,
186
+ "step": 240
187
+ },
188
+ {
189
+ "epoch": 1.4903129657228018,
190
+ "grad_norm": 11.62390422821045,
191
+ "learning_rate": 3.8948069241011986e-05,
192
+ "loss": 0.6704,
193
+ "step": 250
194
+ },
195
+ {
196
+ "epoch": 1.5499254843517138,
197
+ "grad_norm": 10.641077041625977,
198
+ "learning_rate": 3.828229027962716e-05,
199
+ "loss": 0.6071,
200
+ "step": 260
201
+ },
202
+ {
203
+ "epoch": 1.6095380029806259,
204
+ "grad_norm": 9.909879684448242,
205
+ "learning_rate": 3.7616511318242345e-05,
206
+ "loss": 0.6476,
207
+ "step": 270
208
+ },
209
+ {
210
+ "epoch": 1.669150521609538,
211
+ "grad_norm": 9.03955078125,
212
+ "learning_rate": 3.695073235685753e-05,
213
+ "loss": 0.6667,
214
+ "step": 280
215
+ },
216
+ {
217
+ "epoch": 1.7287630402384502,
218
+ "grad_norm": 8.11616325378418,
219
+ "learning_rate": 3.6284953395472704e-05,
220
+ "loss": 0.6774,
221
+ "step": 290
222
+ },
223
+ {
224
+ "epoch": 1.788375558867362,
225
+ "grad_norm": 8.527605056762695,
226
+ "learning_rate": 3.561917443408788e-05,
227
+ "loss": 0.5825,
228
+ "step": 300
229
+ },
230
+ {
231
+ "epoch": 1.8479880774962743,
232
+ "grad_norm": 9.406977653503418,
233
+ "learning_rate": 3.495339547270306e-05,
234
+ "loss": 0.6245,
235
+ "step": 310
236
+ },
237
+ {
238
+ "epoch": 1.9076005961251863,
239
+ "grad_norm": 8.017857551574707,
240
+ "learning_rate": 3.4287616511318246e-05,
241
+ "loss": 0.5938,
242
+ "step": 320
243
+ },
244
+ {
245
+ "epoch": 1.9672131147540983,
246
+ "grad_norm": 7.340479373931885,
247
+ "learning_rate": 3.362183754993342e-05,
248
+ "loss": 0.5427,
249
+ "step": 330
250
+ },
251
+ {
252
+ "epoch": 1.9970193740685542,
253
+ "eval_accuracy": 0.7836034318398475,
254
+ "eval_loss": 0.5231460332870483,
255
+ "eval_runtime": 7.8663,
256
+ "eval_samples_per_second": 133.354,
257
+ "eval_steps_per_second": 16.781,
258
+ "step": 335
259
+ },
260
+ {
261
+ "epoch": 2.0268256333830106,
262
+ "grad_norm": 7.837192535400391,
263
+ "learning_rate": 3.2956058588548605e-05,
264
+ "loss": 0.5391,
265
+ "step": 340
266
+ },
267
+ {
268
+ "epoch": 2.0864381520119224,
269
+ "grad_norm": 9.751850128173828,
270
+ "learning_rate": 3.229027962716378e-05,
271
+ "loss": 0.6554,
272
+ "step": 350
273
+ },
274
+ {
275
+ "epoch": 2.1460506706408347,
276
+ "grad_norm": 14.7183198928833,
277
+ "learning_rate": 3.1624500665778964e-05,
278
+ "loss": 0.6089,
279
+ "step": 360
280
+ },
281
+ {
282
+ "epoch": 2.2056631892697465,
283
+ "grad_norm": 8.721835136413574,
284
+ "learning_rate": 3.095872170439415e-05,
285
+ "loss": 0.6026,
286
+ "step": 370
287
+ },
288
+ {
289
+ "epoch": 2.2652757078986587,
290
+ "grad_norm": 11.642998695373535,
291
+ "learning_rate": 3.0292942743009323e-05,
292
+ "loss": 0.594,
293
+ "step": 380
294
+ },
295
+ {
296
+ "epoch": 2.3248882265275705,
297
+ "grad_norm": 13.0570650100708,
298
+ "learning_rate": 2.96271637816245e-05,
299
+ "loss": 0.5782,
300
+ "step": 390
301
+ },
302
+ {
303
+ "epoch": 2.384500745156483,
304
+ "grad_norm": 11.823034286499023,
305
+ "learning_rate": 2.8961384820239686e-05,
306
+ "loss": 0.4769,
307
+ "step": 400
308
+ },
309
+ {
310
+ "epoch": 2.444113263785395,
311
+ "grad_norm": 11.977195739746094,
312
+ "learning_rate": 2.8295605858854862e-05,
313
+ "loss": 0.545,
314
+ "step": 410
315
+ },
316
+ {
317
+ "epoch": 2.503725782414307,
318
+ "grad_norm": 8.980050086975098,
319
+ "learning_rate": 2.762982689747004e-05,
320
+ "loss": 0.5146,
321
+ "step": 420
322
+ },
323
+ {
324
+ "epoch": 2.563338301043219,
325
+ "grad_norm": 10.377087593078613,
326
+ "learning_rate": 2.6964047936085217e-05,
327
+ "loss": 0.5746,
328
+ "step": 430
329
+ },
330
+ {
331
+ "epoch": 2.6229508196721314,
332
+ "grad_norm": 9.334879875183105,
333
+ "learning_rate": 2.62982689747004e-05,
334
+ "loss": 0.4936,
335
+ "step": 440
336
+ },
337
+ {
338
+ "epoch": 2.682563338301043,
339
+ "grad_norm": 9.12111759185791,
340
+ "learning_rate": 2.563249001331558e-05,
341
+ "loss": 0.5245,
342
+ "step": 450
343
+ },
344
+ {
345
+ "epoch": 2.742175856929955,
346
+ "grad_norm": 8.085973739624023,
347
+ "learning_rate": 2.496671105193076e-05,
348
+ "loss": 0.5766,
349
+ "step": 460
350
+ },
351
+ {
352
+ "epoch": 2.8017883755588673,
353
+ "grad_norm": 8.401688575744629,
354
+ "learning_rate": 2.430093209054594e-05,
355
+ "loss": 0.5464,
356
+ "step": 470
357
+ },
358
+ {
359
+ "epoch": 2.8614008941877795,
360
+ "grad_norm": 8.728181838989258,
361
+ "learning_rate": 2.363515312916112e-05,
362
+ "loss": 0.5163,
363
+ "step": 480
364
+ },
365
+ {
366
+ "epoch": 2.9210134128166914,
367
+ "grad_norm": 7.002892017364502,
368
+ "learning_rate": 2.29693741677763e-05,
369
+ "loss": 0.5183,
370
+ "step": 490
371
+ },
372
+ {
373
+ "epoch": 2.9806259314456036,
374
+ "grad_norm": 12.395273208618164,
375
+ "learning_rate": 2.2303595206391477e-05,
376
+ "loss": 0.523,
377
+ "step": 500
378
+ },
379
+ {
380
+ "epoch": 2.9985096870342773,
381
+ "eval_accuracy": 0.7912297426120114,
382
+ "eval_loss": 0.5245903730392456,
383
+ "eval_runtime": 8.0135,
384
+ "eval_samples_per_second": 130.904,
385
+ "eval_steps_per_second": 16.472,
386
+ "step": 503
387
+ },
388
+ {
389
+ "epoch": 3.0402384500745154,
390
+ "grad_norm": 7.87151575088501,
391
+ "learning_rate": 2.163781624500666e-05,
392
+ "loss": 0.5345,
393
+ "step": 510
394
+ },
395
+ {
396
+ "epoch": 3.0998509687034277,
397
+ "grad_norm": 10.171830177307129,
398
+ "learning_rate": 2.097203728362184e-05,
399
+ "loss": 0.4694,
400
+ "step": 520
401
+ },
402
+ {
403
+ "epoch": 3.15946348733234,
404
+ "grad_norm": 9.175612449645996,
405
+ "learning_rate": 2.030625832223702e-05,
406
+ "loss": 0.5573,
407
+ "step": 530
408
+ },
409
+ {
410
+ "epoch": 3.2190760059612518,
411
+ "grad_norm": 7.954110145568848,
412
+ "learning_rate": 1.96404793608522e-05,
413
+ "loss": 0.5256,
414
+ "step": 540
415
+ },
416
+ {
417
+ "epoch": 3.278688524590164,
418
+ "grad_norm": 10.04851245880127,
419
+ "learning_rate": 1.8974700399467375e-05,
420
+ "loss": 0.5096,
421
+ "step": 550
422
+ },
423
+ {
424
+ "epoch": 3.338301043219076,
425
+ "grad_norm": 6.532379150390625,
426
+ "learning_rate": 1.8308921438082558e-05,
427
+ "loss": 0.4694,
428
+ "step": 560
429
+ },
430
+ {
431
+ "epoch": 3.397913561847988,
432
+ "grad_norm": 10.408387184143066,
433
+ "learning_rate": 1.7643142476697737e-05,
434
+ "loss": 0.5597,
435
+ "step": 570
436
+ },
437
+ {
438
+ "epoch": 3.4575260804769004,
439
+ "grad_norm": 8.469670295715332,
440
+ "learning_rate": 1.6977363515312917e-05,
441
+ "loss": 0.5225,
442
+ "step": 580
443
+ },
444
+ {
445
+ "epoch": 3.517138599105812,
446
+ "grad_norm": 11.036748886108398,
447
+ "learning_rate": 1.6311584553928097e-05,
448
+ "loss": 0.53,
449
+ "step": 590
450
+ },
451
+ {
452
+ "epoch": 3.5767511177347244,
453
+ "grad_norm": 8.11014461517334,
454
+ "learning_rate": 1.5645805592543276e-05,
455
+ "loss": 0.4681,
456
+ "step": 600
457
+ },
458
+ {
459
+ "epoch": 3.6363636363636362,
460
+ "grad_norm": 11.87654972076416,
461
+ "learning_rate": 1.4980026631158456e-05,
462
+ "loss": 0.5149,
463
+ "step": 610
464
+ },
465
+ {
466
+ "epoch": 3.6959761549925485,
467
+ "grad_norm": 10.47935962677002,
468
+ "learning_rate": 1.4314247669773637e-05,
469
+ "loss": 0.5159,
470
+ "step": 620
471
+ },
472
+ {
473
+ "epoch": 3.7555886736214603,
474
+ "grad_norm": 11.331987380981445,
475
+ "learning_rate": 1.3648468708388815e-05,
476
+ "loss": 0.4991,
477
+ "step": 630
478
+ },
479
+ {
480
+ "epoch": 3.8152011922503726,
481
+ "grad_norm": 9.343537330627441,
482
+ "learning_rate": 1.2982689747003996e-05,
483
+ "loss": 0.5833,
484
+ "step": 640
485
+ },
486
+ {
487
+ "epoch": 3.874813710879285,
488
+ "grad_norm": 7.791767120361328,
489
+ "learning_rate": 1.2316910785619175e-05,
490
+ "loss": 0.4634,
491
+ "step": 650
492
+ },
493
+ {
494
+ "epoch": 3.9344262295081966,
495
+ "grad_norm": 10.829383850097656,
496
+ "learning_rate": 1.1651131824234355e-05,
497
+ "loss": 0.5354,
498
+ "step": 660
499
+ },
500
+ {
501
+ "epoch": 3.994038748137109,
502
+ "grad_norm": 12.177925109863281,
503
+ "learning_rate": 1.0985352862849534e-05,
504
+ "loss": 0.4991,
505
+ "step": 670
506
+ },
507
+ {
508
+ "epoch": 4.0,
509
+ "eval_accuracy": 0.7950428979980935,
510
+ "eval_loss": 0.5136051774024963,
511
+ "eval_runtime": 8.0237,
512
+ "eval_samples_per_second": 130.737,
513
+ "eval_steps_per_second": 16.451,
514
+ "step": 671
515
+ },
516
+ {
517
+ "epoch": 4.053651266766021,
518
+ "grad_norm": 10.48951244354248,
519
+ "learning_rate": 1.0319573901464714e-05,
520
+ "loss": 0.5024,
521
+ "step": 680
522
+ },
523
+ {
524
+ "epoch": 4.113263785394933,
525
+ "grad_norm": 5.629518032073975,
526
+ "learning_rate": 9.653794940079893e-06,
527
+ "loss": 0.4227,
528
+ "step": 690
529
+ },
530
+ {
531
+ "epoch": 4.172876304023845,
532
+ "grad_norm": 7.002760410308838,
533
+ "learning_rate": 8.988015978695073e-06,
534
+ "loss": 0.5151,
535
+ "step": 700
536
+ },
537
+ {
538
+ "epoch": 4.2324888226527575,
539
+ "grad_norm": 12.157367706298828,
540
+ "learning_rate": 8.322237017310254e-06,
541
+ "loss": 0.5052,
542
+ "step": 710
543
+ },
544
+ {
545
+ "epoch": 4.292101341281669,
546
+ "grad_norm": 10.323151588439941,
547
+ "learning_rate": 7.656458055925434e-06,
548
+ "loss": 0.5357,
549
+ "step": 720
550
+ },
551
+ {
552
+ "epoch": 4.351713859910581,
553
+ "grad_norm": 6.0294694900512695,
554
+ "learning_rate": 6.9906790945406124e-06,
555
+ "loss": 0.4705,
556
+ "step": 730
557
+ },
558
+ {
559
+ "epoch": 4.411326378539493,
560
+ "grad_norm": 9.291495323181152,
561
+ "learning_rate": 6.324900133155792e-06,
562
+ "loss": 0.5533,
563
+ "step": 740
564
+ },
565
+ {
566
+ "epoch": 4.470938897168406,
567
+ "grad_norm": 13.062582015991211,
568
+ "learning_rate": 5.659121171770972e-06,
569
+ "loss": 0.4519,
570
+ "step": 750
571
+ },
572
+ {
573
+ "epoch": 4.5305514157973175,
574
+ "grad_norm": 9.342095375061035,
575
+ "learning_rate": 4.993342210386152e-06,
576
+ "loss": 0.5205,
577
+ "step": 760
578
+ },
579
+ {
580
+ "epoch": 4.590163934426229,
581
+ "grad_norm": 8.174445152282715,
582
+ "learning_rate": 4.327563249001331e-06,
583
+ "loss": 0.4897,
584
+ "step": 770
585
+ },
586
+ {
587
+ "epoch": 4.649776453055141,
588
+ "grad_norm": 6.821183681488037,
589
+ "learning_rate": 3.6617842876165113e-06,
590
+ "loss": 0.4987,
591
+ "step": 780
592
+ },
593
+ {
594
+ "epoch": 4.709388971684054,
595
+ "grad_norm": 8.713502883911133,
596
+ "learning_rate": 2.9960053262316913e-06,
597
+ "loss": 0.4377,
598
+ "step": 790
599
+ },
600
+ {
601
+ "epoch": 4.769001490312966,
602
+ "grad_norm": 11.140336990356445,
603
+ "learning_rate": 2.3302263648468712e-06,
604
+ "loss": 0.4938,
605
+ "step": 800
606
+ },
607
+ {
608
+ "epoch": 4.828614008941877,
609
+ "grad_norm": 12.790794372558594,
610
+ "learning_rate": 1.6644474034620508e-06,
611
+ "loss": 0.4692,
612
+ "step": 810
613
+ },
614
+ {
615
+ "epoch": 4.88822652757079,
616
+ "grad_norm": 10.664670944213867,
617
+ "learning_rate": 9.986684420772303e-07,
618
+ "loss": 0.5565,
619
+ "step": 820
620
+ },
621
+ {
622
+ "epoch": 4.947839046199702,
623
+ "grad_norm": 8.757057189941406,
624
+ "learning_rate": 3.3288948069241013e-07,
625
+ "loss": 0.4512,
626
+ "step": 830
627
+ },
628
+ {
629
+ "epoch": 4.977645305514158,
630
+ "eval_accuracy": 0.7969494756911344,
631
+ "eval_loss": 0.5111686587333679,
632
+ "eval_runtime": 5.8446,
633
+ "eval_samples_per_second": 179.481,
634
+ "eval_steps_per_second": 22.585,
635
+ "step": 835
636
+ },
637
+ {
638
+ "epoch": 4.977645305514158,
639
+ "step": 835,
640
+ "total_flos": 8.688451575191962e+17,
641
+ "train_loss": 0.5594642650581405,
642
+ "train_runtime": 488.8414,
643
+ "train_samples_per_second": 54.864,
644
+ "train_steps_per_second": 1.708
645
+ }
646
+ ],
647
+ "logging_steps": 10,
648
+ "max_steps": 835,
649
+ "num_input_tokens_seen": 0,
650
+ "num_train_epochs": 5,
651
+ "save_steps": 500,
652
+ "total_flos": 8.688451575191962e+17,
653
+ "train_batch_size": 8,
654
+ "trial_name": null,
655
+ "trial_params": null
656
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23934127385b1240a11ce9ec397acfc535e2db21b1ded7abf52befeb6f98652e
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8409723f2a64956b9c2301dba361b2aa4ad1a5be6b039057c73618cb5e423b83
3
  size 5048