matinbaig43 commited on
Commit
cf98616
1 Parent(s): 2159027

Training in progress, epoch 1

Browse files
all_results.json CHANGED
@@ -1,13 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_accuracy": 0.9777131782945736,
4
- "eval_loss": 0.06364382058382034,
5
- "eval_runtime": 43.0533,
6
- "eval_samples_per_second": 95.881,
7
- "eval_steps_per_second": 11.985,
8
  "total_flos": 5.373364651307827e+17,
9
- "train_loss": 0.5084914574327395,
10
- "train_runtime": 598.2279,
11
- "train_samples_per_second": 27.598,
12
- "train_steps_per_second": 0.863
13
  }
 
1
  {
2
  "epoch": 1.0,
 
 
 
 
 
3
  "total_flos": 5.373364651307827e+17,
4
+ "train_loss": 0.5028278545815815,
5
+ "train_runtime": 599.3147,
6
+ "train_samples_per_second": 27.548,
7
+ "train_steps_per_second": 0.861
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bea936062e51133429ae8560fcbacec8c560473ab8cec312e85be757f3a671d
3
  size 110390140
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58cb5a9ab20204b6d5e3f0588d7a9d9d73f1278a4c2387b33ee1b8c2533803d9
3
  size 110390140
runs/Nov18_15-23-22_55f2c01d5672/events.out.tfevents.1731944528.55f2c01d5672.280.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bce8ecf1dcab33476273f53317c59c0f3c6ff3eef9eea7c76469aea2f670047
3
+ size 17330
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 5.373364651307827e+17,
4
- "train_loss": 0.5084914574327395,
5
- "train_runtime": 598.2279,
6
- "train_samples_per_second": 27.598,
7
- "train_steps_per_second": 0.863
8
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 5.373364651307827e+17,
4
+ "train_loss": 0.5028278545815815,
5
+ "train_runtime": 599.3147,
6
+ "train_samples_per_second": 27.548,
7
+ "train_steps_per_second": 0.861
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.9777131782945736,
3
  "best_model_checkpoint": "swinv2-tiny-patch4-window16-256-finetuned-plantdisease/checkpoint-516",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
@@ -10,378 +10,378 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.01937984496124031,
13
- "grad_norm": 6.709539890289307,
14
  "learning_rate": 9.615384615384616e-06,
15
- "loss": 2.7078,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.03875968992248062,
20
- "grad_norm": 7.255009651184082,
21
  "learning_rate": 1.923076923076923e-05,
22
- "loss": 2.5761,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.05813953488372093,
27
- "grad_norm": 7.875271320343018,
28
  "learning_rate": 2.8846153846153845e-05,
29
- "loss": 2.331,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.07751937984496124,
34
- "grad_norm": 9.543013572692871,
35
  "learning_rate": 3.846153846153846e-05,
36
- "loss": 2.1674,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.09689922480620156,
41
- "grad_norm": 13.559645652770996,
42
  "learning_rate": 4.8076923076923084e-05,
43
- "loss": 1.7745,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.11627906976744186,
48
- "grad_norm": 16.62691307067871,
49
  "learning_rate": 4.913793103448276e-05,
50
- "loss": 1.3937,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 0.13565891472868216,
55
- "grad_norm": 15.683610916137695,
56
  "learning_rate": 4.806034482758621e-05,
57
- "loss": 1.0158,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 0.15503875968992248,
62
- "grad_norm": 14.473851203918457,
63
  "learning_rate": 4.698275862068966e-05,
64
- "loss": 0.8055,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.1744186046511628,
69
- "grad_norm": 14.953446388244629,
70
  "learning_rate": 4.590517241379311e-05,
71
- "loss": 0.6449,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 0.1937984496124031,
76
- "grad_norm": 11.831011772155762,
77
  "learning_rate": 4.482758620689655e-05,
78
- "loss": 0.6051,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 0.2131782945736434,
83
- "grad_norm": 13.278793334960938,
84
  "learning_rate": 4.375e-05,
85
- "loss": 0.5566,
86
  "step": 110
87
  },
88
  {
89
  "epoch": 0.23255813953488372,
90
- "grad_norm": 11.702147483825684,
91
  "learning_rate": 4.267241379310345e-05,
92
- "loss": 0.4845,
93
  "step": 120
94
  },
95
  {
96
  "epoch": 0.25193798449612403,
97
- "grad_norm": 10.426274299621582,
98
  "learning_rate": 4.1594827586206896e-05,
99
- "loss": 0.3641,
100
  "step": 130
101
  },
102
  {
103
  "epoch": 0.2713178294573643,
104
- "grad_norm": 17.529104232788086,
105
  "learning_rate": 4.0517241379310344e-05,
106
- "loss": 0.3593,
107
  "step": 140
108
  },
109
  {
110
  "epoch": 0.29069767441860467,
111
- "grad_norm": 12.532708168029785,
112
  "learning_rate": 3.94396551724138e-05,
113
- "loss": 0.3459,
114
  "step": 150
115
  },
116
  {
117
  "epoch": 0.31007751937984496,
118
- "grad_norm": 16.44886589050293,
119
  "learning_rate": 3.8362068965517246e-05,
120
- "loss": 0.4475,
121
  "step": 160
122
  },
123
  {
124
  "epoch": 0.32945736434108525,
125
- "grad_norm": 8.827244758605957,
126
  "learning_rate": 3.7284482758620694e-05,
127
- "loss": 0.3544,
128
  "step": 170
129
  },
130
  {
131
  "epoch": 0.3488372093023256,
132
- "grad_norm": 6.035519123077393,
133
  "learning_rate": 3.620689655172414e-05,
134
- "loss": 0.3214,
135
  "step": 180
136
  },
137
  {
138
  "epoch": 0.3682170542635659,
139
- "grad_norm": 7.568902969360352,
140
  "learning_rate": 3.512931034482759e-05,
141
- "loss": 0.2359,
142
  "step": 190
143
  },
144
  {
145
  "epoch": 0.3875968992248062,
146
- "grad_norm": 13.209157943725586,
147
  "learning_rate": 3.405172413793103e-05,
148
- "loss": 0.34,
149
  "step": 200
150
  },
151
  {
152
  "epoch": 0.4069767441860465,
153
- "grad_norm": 19.102611541748047,
154
  "learning_rate": 3.297413793103448e-05,
155
- "loss": 0.3697,
156
  "step": 210
157
  },
158
  {
159
  "epoch": 0.4263565891472868,
160
- "grad_norm": 12.98582649230957,
161
  "learning_rate": 3.1896551724137935e-05,
162
- "loss": 0.2864,
163
  "step": 220
164
  },
165
  {
166
  "epoch": 0.44573643410852715,
167
- "grad_norm": 14.064764976501465,
168
  "learning_rate": 3.081896551724138e-05,
169
- "loss": 0.2606,
170
  "step": 230
171
  },
172
  {
173
  "epoch": 0.46511627906976744,
174
- "grad_norm": 13.41550064086914,
175
  "learning_rate": 2.974137931034483e-05,
176
- "loss": 0.2949,
177
  "step": 240
178
  },
179
  {
180
  "epoch": 0.4844961240310077,
181
- "grad_norm": 7.437551498413086,
182
  "learning_rate": 2.866379310344828e-05,
183
- "loss": 0.1955,
184
  "step": 250
185
  },
186
  {
187
  "epoch": 0.5038759689922481,
188
- "grad_norm": 8.963953018188477,
189
  "learning_rate": 2.7586206896551727e-05,
190
- "loss": 0.2536,
191
  "step": 260
192
  },
193
  {
194
  "epoch": 0.5232558139534884,
195
- "grad_norm": 11.374913215637207,
196
  "learning_rate": 2.650862068965517e-05,
197
- "loss": 0.2592,
198
  "step": 270
199
  },
200
  {
201
  "epoch": 0.5426356589147286,
202
- "grad_norm": 7.7792768478393555,
203
  "learning_rate": 2.543103448275862e-05,
204
- "loss": 0.2024,
205
  "step": 280
206
  },
207
  {
208
  "epoch": 0.562015503875969,
209
- "grad_norm": 14.774372100830078,
210
  "learning_rate": 2.435344827586207e-05,
211
- "loss": 0.228,
212
  "step": 290
213
  },
214
  {
215
  "epoch": 0.5813953488372093,
216
- "grad_norm": 8.32029914855957,
217
  "learning_rate": 2.327586206896552e-05,
218
- "loss": 0.1849,
219
  "step": 300
220
  },
221
  {
222
  "epoch": 0.6007751937984496,
223
- "grad_norm": 14.661565780639648,
224
  "learning_rate": 2.2198275862068967e-05,
225
- "loss": 0.1835,
226
  "step": 310
227
  },
228
  {
229
  "epoch": 0.6201550387596899,
230
- "grad_norm": 15.804061889648438,
231
  "learning_rate": 2.1120689655172415e-05,
232
- "loss": 0.1828,
233
  "step": 320
234
  },
235
  {
236
  "epoch": 0.6395348837209303,
237
- "grad_norm": 19.60464859008789,
238
  "learning_rate": 2.0043103448275863e-05,
239
- "loss": 0.1874,
240
  "step": 330
241
  },
242
  {
243
  "epoch": 0.6589147286821705,
244
- "grad_norm": 5.921483516693115,
245
  "learning_rate": 1.896551724137931e-05,
246
- "loss": 0.1741,
247
  "step": 340
248
  },
249
  {
250
  "epoch": 0.6782945736434108,
251
- "grad_norm": 8.562132835388184,
252
  "learning_rate": 1.7887931034482762e-05,
253
- "loss": 0.2541,
254
  "step": 350
255
  },
256
  {
257
  "epoch": 0.6976744186046512,
258
- "grad_norm": 7.037123680114746,
259
  "learning_rate": 1.6810344827586207e-05,
260
- "loss": 0.2101,
261
  "step": 360
262
  },
263
  {
264
  "epoch": 0.7170542635658915,
265
- "grad_norm": 4.712380886077881,
266
  "learning_rate": 1.5732758620689655e-05,
267
- "loss": 0.2088,
268
  "step": 370
269
  },
270
  {
271
  "epoch": 0.7364341085271318,
272
- "grad_norm": 5.721762180328369,
273
  "learning_rate": 1.4655172413793103e-05,
274
- "loss": 0.2245,
275
  "step": 380
276
  },
277
  {
278
  "epoch": 0.7558139534883721,
279
- "grad_norm": 8.76972484588623,
280
  "learning_rate": 1.3577586206896553e-05,
281
- "loss": 0.1713,
282
  "step": 390
283
  },
284
  {
285
  "epoch": 0.7751937984496124,
286
- "grad_norm": 8.067720413208008,
287
  "learning_rate": 1.25e-05,
288
- "loss": 0.1822,
289
  "step": 400
290
  },
291
  {
292
  "epoch": 0.7945736434108527,
293
- "grad_norm": 8.3314208984375,
294
  "learning_rate": 1.1422413793103449e-05,
295
- "loss": 0.1781,
296
  "step": 410
297
  },
298
  {
299
  "epoch": 0.813953488372093,
300
- "grad_norm": 22.368515014648438,
301
  "learning_rate": 1.0344827586206897e-05,
302
- "loss": 0.1921,
303
  "step": 420
304
  },
305
  {
306
  "epoch": 0.8333333333333334,
307
- "grad_norm": 11.097426414489746,
308
  "learning_rate": 9.267241379310346e-06,
309
- "loss": 0.1627,
310
  "step": 430
311
  },
312
  {
313
  "epoch": 0.8527131782945736,
314
- "grad_norm": 20.73263168334961,
315
  "learning_rate": 8.189655172413793e-06,
316
- "loss": 0.1884,
317
  "step": 440
318
  },
319
  {
320
  "epoch": 0.872093023255814,
321
- "grad_norm": 2.5283663272857666,
322
  "learning_rate": 7.112068965517242e-06,
323
- "loss": 0.2005,
324
  "step": 450
325
  },
326
  {
327
  "epoch": 0.8914728682170543,
328
- "grad_norm": 4.581237316131592,
329
  "learning_rate": 6.03448275862069e-06,
330
- "loss": 0.1945,
331
  "step": 460
332
  },
333
  {
334
  "epoch": 0.9108527131782945,
335
- "grad_norm": 4.513861656188965,
336
  "learning_rate": 4.9568965517241384e-06,
337
- "loss": 0.1427,
338
  "step": 470
339
  },
340
  {
341
  "epoch": 0.9302325581395349,
342
- "grad_norm": 13.439146041870117,
343
  "learning_rate": 3.8793103448275865e-06,
344
- "loss": 0.1233,
345
  "step": 480
346
  },
347
  {
348
  "epoch": 0.9496124031007752,
349
- "grad_norm": 9.163981437683105,
350
  "learning_rate": 2.8017241379310345e-06,
351
- "loss": 0.1171,
352
  "step": 490
353
  },
354
  {
355
  "epoch": 0.9689922480620154,
356
- "grad_norm": 6.725685119628906,
357
  "learning_rate": 1.724137931034483e-06,
358
- "loss": 0.2016,
359
  "step": 500
360
  },
361
  {
362
  "epoch": 0.9883720930232558,
363
- "grad_norm": 5.155745029449463,
364
  "learning_rate": 6.46551724137931e-07,
365
- "loss": 0.103,
366
  "step": 510
367
  },
368
  {
369
  "epoch": 1.0,
370
- "eval_accuracy": 0.9777131782945736,
371
- "eval_loss": 0.06364382058382034,
372
- "eval_runtime": 43.5867,
373
- "eval_samples_per_second": 94.708,
374
- "eval_steps_per_second": 11.838,
375
  "step": 516
376
  },
377
  {
378
  "epoch": 1.0,
379
  "step": 516,
380
  "total_flos": 5.373364651307827e+17,
381
- "train_loss": 0.5084914574327395,
382
- "train_runtime": 598.2279,
383
- "train_samples_per_second": 27.598,
384
- "train_steps_per_second": 0.863
385
  }
386
  ],
387
  "logging_steps": 10,
 
1
  {
2
+ "best_metric": 0.9798934108527132,
3
  "best_model_checkpoint": "swinv2-tiny-patch4-window16-256-finetuned-plantdisease/checkpoint-516",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.01937984496124031,
13
+ "grad_norm": 25.043317794799805,
14
  "learning_rate": 9.615384615384616e-06,
15
+ "loss": 2.7216,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.03875968992248062,
20
+ "grad_norm": 28.33056640625,
21
  "learning_rate": 1.923076923076923e-05,
22
+ "loss": 2.5612,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.05813953488372093,
27
+ "grad_norm": 29.639535903930664,
28
  "learning_rate": 2.8846153846153845e-05,
29
+ "loss": 2.351,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.07751937984496124,
34
+ "grad_norm": 37.78919219970703,
35
  "learning_rate": 3.846153846153846e-05,
36
+ "loss": 2.0721,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.09689922480620156,
41
+ "grad_norm": 43.523765563964844,
42
  "learning_rate": 4.8076923076923084e-05,
43
+ "loss": 1.7632,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.11627906976744186,
48
+ "grad_norm": 57.11870574951172,
49
  "learning_rate": 4.913793103448276e-05,
50
+ "loss": 1.2954,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 0.13565891472868216,
55
+ "grad_norm": 77.69208526611328,
56
  "learning_rate": 4.806034482758621e-05,
57
+ "loss": 1.0353,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 0.15503875968992248,
62
+ "grad_norm": 58.72177505493164,
63
  "learning_rate": 4.698275862068966e-05,
64
+ "loss": 0.7396,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.1744186046511628,
69
+ "grad_norm": 45.172569274902344,
70
  "learning_rate": 4.590517241379311e-05,
71
+ "loss": 0.5802,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 0.1937984496124031,
76
+ "grad_norm": 101.80105590820312,
77
  "learning_rate": 4.482758620689655e-05,
78
+ "loss": 0.6721,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 0.2131782945736434,
83
+ "grad_norm": 35.118316650390625,
84
  "learning_rate": 4.375e-05,
85
+ "loss": 0.604,
86
  "step": 110
87
  },
88
  {
89
  "epoch": 0.23255813953488372,
90
+ "grad_norm": 51.14491653442383,
91
  "learning_rate": 4.267241379310345e-05,
92
+ "loss": 0.4788,
93
  "step": 120
94
  },
95
  {
96
  "epoch": 0.25193798449612403,
97
+ "grad_norm": 55.622344970703125,
98
  "learning_rate": 4.1594827586206896e-05,
99
+ "loss": 0.352,
100
  "step": 130
101
  },
102
  {
103
  "epoch": 0.2713178294573643,
104
+ "grad_norm": 51.51637649536133,
105
  "learning_rate": 4.0517241379310344e-05,
106
+ "loss": 0.3522,
107
  "step": 140
108
  },
109
  {
110
  "epoch": 0.29069767441860467,
111
+ "grad_norm": 51.93968963623047,
112
  "learning_rate": 3.94396551724138e-05,
113
+ "loss": 0.3595,
114
  "step": 150
115
  },
116
  {
117
  "epoch": 0.31007751937984496,
118
+ "grad_norm": 62.32090759277344,
119
  "learning_rate": 3.8362068965517246e-05,
120
+ "loss": 0.4144,
121
  "step": 160
122
  },
123
  {
124
  "epoch": 0.32945736434108525,
125
+ "grad_norm": 49.8425407409668,
126
  "learning_rate": 3.7284482758620694e-05,
127
+ "loss": 0.3176,
128
  "step": 170
129
  },
130
  {
131
  "epoch": 0.3488372093023256,
132
+ "grad_norm": 65.97383880615234,
133
  "learning_rate": 3.620689655172414e-05,
134
+ "loss": 0.3074,
135
  "step": 180
136
  },
137
  {
138
  "epoch": 0.3682170542635659,
139
+ "grad_norm": 41.63103103637695,
140
  "learning_rate": 3.512931034482759e-05,
141
+ "loss": 0.2868,
142
  "step": 190
143
  },
144
  {
145
  "epoch": 0.3875968992248062,
146
+ "grad_norm": 54.208648681640625,
147
  "learning_rate": 3.405172413793103e-05,
148
+ "loss": 0.3902,
149
  "step": 200
150
  },
151
  {
152
  "epoch": 0.4069767441860465,
153
+ "grad_norm": 63.819393157958984,
154
  "learning_rate": 3.297413793103448e-05,
155
+ "loss": 0.3249,
156
  "step": 210
157
  },
158
  {
159
  "epoch": 0.4263565891472868,
160
+ "grad_norm": 40.1036376953125,
161
  "learning_rate": 3.1896551724137935e-05,
162
+ "loss": 0.2938,
163
  "step": 220
164
  },
165
  {
166
  "epoch": 0.44573643410852715,
167
+ "grad_norm": 43.186614990234375,
168
  "learning_rate": 3.081896551724138e-05,
169
+ "loss": 0.2598,
170
  "step": 230
171
  },
172
  {
173
  "epoch": 0.46511627906976744,
174
+ "grad_norm": 38.62468719482422,
175
  "learning_rate": 2.974137931034483e-05,
176
+ "loss": 0.2023,
177
  "step": 240
178
  },
179
  {
180
  "epoch": 0.4844961240310077,
181
+ "grad_norm": 48.45721435546875,
182
  "learning_rate": 2.866379310344828e-05,
183
+ "loss": 0.2323,
184
  "step": 250
185
  },
186
  {
187
  "epoch": 0.5038759689922481,
188
+ "grad_norm": 41.89044189453125,
189
  "learning_rate": 2.7586206896551727e-05,
190
+ "loss": 0.276,
191
  "step": 260
192
  },
193
  {
194
  "epoch": 0.5232558139534884,
195
+ "grad_norm": 55.60496520996094,
196
  "learning_rate": 2.650862068965517e-05,
197
+ "loss": 0.2405,
198
  "step": 270
199
  },
200
  {
201
  "epoch": 0.5426356589147286,
202
+ "grad_norm": 145.46893310546875,
203
  "learning_rate": 2.543103448275862e-05,
204
+ "loss": 0.1902,
205
  "step": 280
206
  },
207
  {
208
  "epoch": 0.562015503875969,
209
+ "grad_norm": 52.95439147949219,
210
  "learning_rate": 2.435344827586207e-05,
211
+ "loss": 0.173,
212
  "step": 290
213
  },
214
  {
215
  "epoch": 0.5813953488372093,
216
+ "grad_norm": 45.259090423583984,
217
  "learning_rate": 2.327586206896552e-05,
218
+ "loss": 0.1893,
219
  "step": 300
220
  },
221
  {
222
  "epoch": 0.6007751937984496,
223
+ "grad_norm": 26.136516571044922,
224
  "learning_rate": 2.2198275862068967e-05,
225
+ "loss": 0.2305,
226
  "step": 310
227
  },
228
  {
229
  "epoch": 0.6201550387596899,
230
+ "grad_norm": 16.7691593170166,
231
  "learning_rate": 2.1120689655172415e-05,
232
+ "loss": 0.2272,
233
  "step": 320
234
  },
235
  {
236
  "epoch": 0.6395348837209303,
237
+ "grad_norm": 33.225093841552734,
238
  "learning_rate": 2.0043103448275863e-05,
239
+ "loss": 0.1852,
240
  "step": 330
241
  },
242
  {
243
  "epoch": 0.6589147286821705,
244
+ "grad_norm": 46.34248352050781,
245
  "learning_rate": 1.896551724137931e-05,
246
+ "loss": 0.1849,
247
  "step": 340
248
  },
249
  {
250
  "epoch": 0.6782945736434108,
251
+ "grad_norm": 39.95323944091797,
252
  "learning_rate": 1.7887931034482762e-05,
253
+ "loss": 0.2435,
254
  "step": 350
255
  },
256
  {
257
  "epoch": 0.6976744186046512,
258
+ "grad_norm": 30.731279373168945,
259
  "learning_rate": 1.6810344827586207e-05,
260
+ "loss": 0.1624,
261
  "step": 360
262
  },
263
  {
264
  "epoch": 0.7170542635658915,
265
+ "grad_norm": 8.740701675415039,
266
  "learning_rate": 1.5732758620689655e-05,
267
+ "loss": 0.2548,
268
  "step": 370
269
  },
270
  {
271
  "epoch": 0.7364341085271318,
272
+ "grad_norm": 82.22977447509766,
273
  "learning_rate": 1.4655172413793103e-05,
274
+ "loss": 0.2045,
275
  "step": 380
276
  },
277
  {
278
  "epoch": 0.7558139534883721,
279
+ "grad_norm": 63.77909851074219,
280
  "learning_rate": 1.3577586206896553e-05,
281
+ "loss": 0.1882,
282
  "step": 390
283
  },
284
  {
285
  "epoch": 0.7751937984496124,
286
+ "grad_norm": 45.88880157470703,
287
  "learning_rate": 1.25e-05,
288
+ "loss": 0.2073,
289
  "step": 400
290
  },
291
  {
292
  "epoch": 0.7945736434108527,
293
+ "grad_norm": 17.87135887145996,
294
  "learning_rate": 1.1422413793103449e-05,
295
+ "loss": 0.1445,
296
  "step": 410
297
  },
298
  {
299
  "epoch": 0.813953488372093,
300
+ "grad_norm": 43.013282775878906,
301
  "learning_rate": 1.0344827586206897e-05,
302
+ "loss": 0.1474,
303
  "step": 420
304
  },
305
  {
306
  "epoch": 0.8333333333333334,
307
+ "grad_norm": 40.31475067138672,
308
  "learning_rate": 9.267241379310346e-06,
309
+ "loss": 0.2041,
310
  "step": 430
311
  },
312
  {
313
  "epoch": 0.8527131782945736,
314
+ "grad_norm": 49.196441650390625,
315
  "learning_rate": 8.189655172413793e-06,
316
+ "loss": 0.2133,
317
  "step": 440
318
  },
319
  {
320
  "epoch": 0.872093023255814,
321
+ "grad_norm": 20.448143005371094,
322
  "learning_rate": 7.112068965517242e-06,
323
+ "loss": 0.1946,
324
  "step": 450
325
  },
326
  {
327
  "epoch": 0.8914728682170543,
328
+ "grad_norm": 21.7471923828125,
329
  "learning_rate": 6.03448275862069e-06,
330
+ "loss": 0.1013,
331
  "step": 460
332
  },
333
  {
334
  "epoch": 0.9108527131782945,
335
+ "grad_norm": 40.28746795654297,
336
  "learning_rate": 4.9568965517241384e-06,
337
+ "loss": 0.1489,
338
  "step": 470
339
  },
340
  {
341
  "epoch": 0.9302325581395349,
342
+ "grad_norm": 39.44319534301758,
343
  "learning_rate": 3.8793103448275865e-06,
344
+ "loss": 0.1403,
345
  "step": 480
346
  },
347
  {
348
  "epoch": 0.9496124031007752,
349
+ "grad_norm": 38.28104782104492,
350
  "learning_rate": 2.8017241379310345e-06,
351
+ "loss": 0.1278,
352
  "step": 490
353
  },
354
  {
355
  "epoch": 0.9689922480620154,
356
+ "grad_norm": 30.639806747436523,
357
  "learning_rate": 1.724137931034483e-06,
358
+ "loss": 0.1281,
359
  "step": 500
360
  },
361
  {
362
  "epoch": 0.9883720930232558,
363
+ "grad_norm": 51.52839660644531,
364
  "learning_rate": 6.46551724137931e-07,
365
+ "loss": 0.1279,
366
  "step": 510
367
  },
368
  {
369
  "epoch": 1.0,
370
+ "eval_accuracy": 0.9798934108527132,
371
+ "eval_loss": 0.05675136297941208,
372
+ "eval_runtime": 44.1069,
373
+ "eval_samples_per_second": 93.591,
374
+ "eval_steps_per_second": 11.699,
375
  "step": 516
376
  },
377
  {
378
  "epoch": 1.0,
379
  "step": 516,
380
  "total_flos": 5.373364651307827e+17,
381
+ "train_loss": 0.5028278545815815,
382
+ "train_runtime": 599.3147,
383
+ "train_samples_per_second": 27.548,
384
+ "train_steps_per_second": 0.861
385
  }
386
  ],
387
  "logging_steps": 10,