alyzbane commited on
Commit
cc2baeb
1 Parent(s): 88724c8

End of training

Browse files
Files changed (6) hide show
  1. README.md +15 -15
  2. all_results.json +14 -14
  3. config.json +1 -0
  4. eval_results.json +9 -9
  5. train_results.json +6 -6
  6. trainer_state.json +873 -399
README.md CHANGED
@@ -6,10 +6,10 @@ tags:
6
  datasets:
7
  - imagefolder
8
  metrics:
9
- - accuracy
10
- - f1
11
  - precision
12
  - recall
 
 
13
  model-index:
14
  - name: resnet-50-finetuned-FBark
15
  results:
@@ -23,18 +23,18 @@ model-index:
23
  split: train
24
  args: default
25
  metrics:
26
- - name: Accuracy
27
- type: accuracy
28
- value: 0.9906542056074766
29
- - name: F1
30
- type: f1
31
- value: 0.9922719141323793
32
  - name: Precision
33
  type: precision
34
- value: 0.990909090909091
35
  - name: Recall
36
  type: recall
37
- value: 0.9939393939393939
 
 
 
 
 
 
38
  ---
39
 
40
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -44,11 +44,11 @@ should probably proofread and complete it, then remove this comment. -->
44
 
45
  This model is a fine-tuned version of [microsoft/resnet-50](https://huggingface.co/microsoft/resnet-50) on the imagefolder dataset.
46
  It achieves the following results on the evaluation set:
47
- - Accuracy: 0.9907
48
- - F1: 0.9923
49
- - Loss: 0.0579
50
- - Precision: 0.9909
51
- - Recall: 0.9939
52
 
53
  ## Model description
54
 
 
6
  datasets:
7
  - imagefolder
8
  metrics:
 
 
9
  - precision
10
  - recall
11
+ - f1
12
+ - accuracy
13
  model-index:
14
  - name: resnet-50-finetuned-FBark
15
  results:
 
23
  split: train
24
  args: default
25
  metrics:
 
 
 
 
 
 
26
  - name: Precision
27
  type: precision
28
+ value: 0.9699498746867168
29
  - name: Recall
30
  type: recall
31
+ value: 0.9778787878787879
32
+ - name: F1
33
+ type: f1
34
+ value: 0.9734665458141067
35
+ - name: Accuracy
36
+ type: accuracy
37
+ value: 0.9719626168224299
38
  ---
39
 
40
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
44
 
45
  This model is a fine-tuned version of [microsoft/resnet-50](https://huggingface.co/microsoft/resnet-50) on the imagefolder dataset.
46
  It achieves the following results on the evaluation set:
47
+ - Loss: 0.1079
48
+ - Precision: 0.9699
49
+ - Recall: 0.9779
50
+ - F1: 0.9735
51
+ - Accuracy: 0.9720
52
 
53
  ## Model description
54
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 19.85,
3
- "eval_accuracy": 0.9906542056074766,
4
- "eval_f1": 0.9922719141323793,
5
- "eval_loss": 0.07943902164697647,
6
- "eval_precision": 0.990909090909091,
7
- "eval_recall": 0.9939393939393939,
8
- "eval_runtime": 36.1652,
9
- "eval_samples_per_second": 2.959,
10
- "eval_steps_per_second": 0.387,
11
- "total_flos": 1.7791424076806554e+17,
12
- "train_loss": 0.16752889706538274,
13
- "train_runtime": 3044.6611,
14
- "train_samples_per_second": 2.785,
15
- "train_steps_per_second": 0.085
16
  }
 
1
  {
2
+ "epoch": 34.34,
3
+ "eval_accuracy": 0.9719626168224299,
4
+ "eval_f1": 0.9734665458141067,
5
+ "eval_loss": 0.10789879411458969,
6
+ "eval_precision": 0.9699498746867168,
7
+ "eval_recall": 0.9778787878787879,
8
+ "eval_runtime": 41.2024,
9
+ "eval_samples_per_second": 2.597,
10
+ "eval_steps_per_second": 0.34,
11
+ "total_flos": 3.0926830773436416e+17,
12
+ "train_loss": 0.0,
13
+ "train_runtime": 0.0155,
14
+ "train_samples_per_second": 955313.811,
15
+ "train_steps_per_second": 29290.282
16
  }
config.json CHANGED
@@ -42,6 +42,7 @@
42
  "out_indices": [
43
  4
44
  ],
 
45
  "stage_names": [
46
  "stem",
47
  "stage1",
 
42
  "out_indices": [
43
  4
44
  ],
45
+ "problem_type": "single_label_classification",
46
  "stage_names": [
47
  "stem",
48
  "stage1",
eval_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
- "epoch": 19.85,
3
- "eval_accuracy": 0.9906542056074766,
4
- "eval_f1": 0.9922719141323793,
5
- "eval_loss": 0.07943902164697647,
6
- "eval_precision": 0.990909090909091,
7
- "eval_recall": 0.9939393939393939,
8
- "eval_runtime": 36.1652,
9
- "eval_samples_per_second": 2.959,
10
- "eval_steps_per_second": 0.387
11
  }
 
1
  {
2
+ "epoch": 34.34,
3
+ "eval_accuracy": 0.9719626168224299,
4
+ "eval_f1": 0.9734665458141067,
5
+ "eval_loss": 0.10789879411458969,
6
+ "eval_precision": 0.9699498746867168,
7
+ "eval_recall": 0.9778787878787879,
8
+ "eval_runtime": 41.2024,
9
+ "eval_samples_per_second": 2.597,
10
+ "eval_steps_per_second": 0.34
11
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 19.85,
3
- "total_flos": 1.7791424076806554e+17,
4
- "train_loss": 0.16752889706538274,
5
- "train_runtime": 3044.6611,
6
- "train_samples_per_second": 2.785,
7
- "train_steps_per_second": 0.085
8
  }
 
1
  {
2
+ "epoch": 34.34,
3
+ "total_flos": 3.0926830773436416e+17,
4
+ "train_loss": 0.0,
5
+ "train_runtime": 0.0155,
6
+ "train_samples_per_second": 955313.811,
7
+ "train_steps_per_second": 29290.282
8
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.9906542056074766,
3
- "best_model_checkpoint": "resnet-50-finetuned-FBark\\checkpoint-182",
4
- "epoch": 19.849056603773583,
5
  "eval_steps": 500,
6
- "global_step": 260,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -11,639 +11,1113 @@
11
  {
12
  "epoch": 0.98,
13
  "step": 13,
14
- "train_accuracy": 0.44339622641509435,
15
- "train_f1": 0.30273443406045597,
16
- "train_loss": 1.3222538232803345,
17
- "train_precision": 0.4409161713465874,
18
- "train_recall": 0.40814882032667876,
19
- "train_runtime": 131.484,
20
- "train_samples_per_second": 3.225,
21
- "train_steps_per_second": 0.403
22
  },
23
  {
24
  "epoch": 0.98,
25
- "grad_norm": 2.9973506927490234,
26
- "learning_rate": 2.9999999999999997e-05,
27
- "loss": 1.4155,
28
  "step": 13
29
  },
30
  {
31
  "epoch": 0.98,
32
- "eval_accuracy": 0.38317757009345793,
33
- "eval_f1": 0.2502374169040836,
34
- "eval_loss": 1.2673313617706299,
35
- "eval_precision": 0.3522222222222222,
36
- "eval_recall": 0.3991341991341991,
37
- "eval_runtime": 47.8505,
38
- "eval_samples_per_second": 2.236,
39
- "eval_steps_per_second": 0.293,
40
  "step": 13
41
  },
42
  {
43
  "epoch": 1.96,
44
  "step": 26,
45
- "train_accuracy": 0.5094339622641509,
46
- "train_f1": 0.3768708281840832,
47
- "train_loss": 1.2979044914245605,
48
- "train_precision": 0.5660861330698287,
49
- "train_recall": 0.4677431408134941,
50
- "train_runtime": 148.3187,
51
- "train_samples_per_second": 2.859,
52
- "train_steps_per_second": 0.357
53
  },
54
  {
55
  "epoch": 1.96,
56
- "grad_norm": 1.2892597913742065,
57
- "learning_rate": 5.9999999999999995e-05,
58
- "loss": 1.389,
59
  "step": 26
60
  },
61
  {
62
  "epoch": 1.96,
63
- "eval_accuracy": 0.42990654205607476,
64
- "eval_f1": 0.31675399012689665,
65
- "eval_loss": 1.2493001222610474,
66
- "eval_precision": 0.3496434937611408,
67
- "eval_recall": 0.4454545454545455,
68
- "eval_runtime": 46.5066,
69
- "eval_samples_per_second": 2.301,
70
- "eval_steps_per_second": 0.301,
71
  "step": 26
72
  },
73
  {
74
  "epoch": 2.94,
75
  "step": 39,
76
- "train_accuracy": 0.4811320754716981,
77
- "train_f1": 0.3565462103231007,
78
- "train_loss": 1.2638133764266968,
79
- "train_precision": 0.35321937321937324,
80
- "train_recall": 0.44614604462474644,
81
- "train_runtime": 133.803,
82
- "train_samples_per_second": 3.169,
83
- "train_steps_per_second": 0.396
84
  },
85
  {
86
  "epoch": 2.94,
87
- "grad_norm": 1.521682620048523,
88
- "learning_rate": 8.999999999999999e-05,
89
- "loss": 1.3742,
90
  "step": 39
91
  },
92
  {
93
  "epoch": 2.94,
94
- "eval_accuracy": 0.4392523364485981,
95
- "eval_f1": 0.33554006968641115,
96
- "eval_loss": 1.223555326461792,
97
- "eval_precision": 0.3334061930783242,
98
- "eval_recall": 0.4536796536796537,
99
- "eval_runtime": 47.089,
100
- "eval_samples_per_second": 2.272,
101
- "eval_steps_per_second": 0.297,
102
  "step": 39
103
  },
104
  {
105
  "epoch": 4.0,
106
  "step": 53,
107
- "train_accuracy": 0.5683962264150944,
108
- "train_f1": 0.4573118754465906,
109
- "train_loss": 1.220451831817627,
110
- "train_precision": 0.6263221242541064,
111
- "train_recall": 0.5256962525336195,
112
- "train_runtime": 123.6293,
113
- "train_samples_per_second": 3.43,
114
- "train_steps_per_second": 0.429
115
  },
116
  {
117
  "epoch": 4.0,
118
- "grad_norm": 1.533220648765564,
119
- "learning_rate": 0.0001223076923076923,
120
- "loss": 1.2592,
121
  "step": 53
122
  },
123
  {
124
  "epoch": 4.0,
125
- "eval_accuracy": 0.5233644859813084,
126
- "eval_f1": 0.459322365844105,
127
- "eval_loss": 1.1768313646316528,
128
- "eval_precision": 0.6458215962441315,
129
- "eval_recall": 0.5359683794466403,
130
- "eval_runtime": 52.0476,
131
- "eval_samples_per_second": 2.056,
132
- "eval_steps_per_second": 0.269,
133
  "step": 53
134
  },
135
  {
136
  "epoch": 4.98,
137
  "step": 66,
138
- "train_accuracy": 0.5990566037735849,
139
- "train_f1": 0.49108922737322597,
140
- "train_loss": 1.1574139595031738,
141
- "train_precision": 0.6493666135599222,
142
- "train_recall": 0.55391404130241,
143
- "train_runtime": 122.643,
144
- "train_samples_per_second": 3.457,
145
- "train_steps_per_second": 0.432
146
  },
147
  {
148
  "epoch": 4.98,
149
- "grad_norm": 1.2271511554718018,
150
- "learning_rate": 0.00015230769230769228,
151
- "loss": 1.287,
152
  "step": 66
153
  },
154
  {
155
  "epoch": 4.98,
156
- "eval_accuracy": 0.5607476635514018,
157
- "eval_f1": 0.48464696223316917,
158
- "eval_loss": 1.1236993074417114,
159
- "eval_precision": 0.6436363636363637,
160
- "eval_recall": 0.5723320158102767,
161
- "eval_runtime": 41.452,
162
- "eval_samples_per_second": 2.581,
163
- "eval_steps_per_second": 0.338,
164
  "step": 66
165
  },
166
  {
167
  "epoch": 5.96,
168
  "step": 79,
169
- "train_accuracy": 0.6226415094339622,
170
- "train_f1": 0.5420012941762071,
171
- "train_loss": 1.0438413619995117,
172
- "train_precision": 0.8243247706573527,
173
- "train_recall": 0.5806628230445157,
174
- "train_runtime": 120.0289,
175
- "train_samples_per_second": 3.532,
176
- "train_steps_per_second": 0.442
177
  },
178
  {
179
  "epoch": 5.96,
180
- "grad_norm": 1.7175101041793823,
181
- "learning_rate": 0.00018230769230769228,
182
- "loss": 1.2337,
183
  "step": 79
184
  },
185
  {
186
  "epoch": 5.96,
187
- "eval_accuracy": 0.5981308411214953,
188
- "eval_f1": 0.5485250474059656,
189
- "eval_loss": 1.0242024660110474,
190
- "eval_precision": 0.865625,
191
- "eval_recall": 0.6083380387728214,
192
- "eval_runtime": 45.0579,
193
- "eval_samples_per_second": 2.375,
194
- "eval_steps_per_second": 0.311,
195
  "step": 79
196
  },
197
  {
198
  "epoch": 6.94,
199
  "step": 92,
200
- "train_accuracy": 0.6745283018867925,
201
- "train_f1": 0.6334819902275806,
202
- "train_loss": 0.935691237449646,
203
- "train_precision": 0.8151341081504686,
204
- "train_recall": 0.6419837529777849,
205
- "train_runtime": 130.3177,
206
- "train_samples_per_second": 3.254,
207
  "train_steps_per_second": 0.407
208
  },
209
  {
210
  "epoch": 6.94,
211
- "grad_norm": 1.9299957752227783,
212
- "learning_rate": 0.0002123076923076923,
213
- "loss": 1.1327,
214
  "step": 92
215
  },
216
  {
217
  "epoch": 6.94,
218
- "eval_accuracy": 0.7102803738317757,
219
- "eval_f1": 0.7062065516379095,
220
- "eval_loss": 0.9071274995803833,
221
- "eval_precision": 0.8466666666666667,
222
- "eval_recall": 0.7193111236589498,
223
- "eval_runtime": 42.5223,
224
- "eval_samples_per_second": 2.516,
225
- "eval_steps_per_second": 0.329,
226
  "step": 92
227
  },
228
  {
229
  "epoch": 8.0,
230
  "step": 106,
231
- "train_accuracy": 0.7971698113207547,
232
- "train_f1": 0.7766198391591397,
233
- "train_loss": 0.7271670699119568,
234
- "train_precision": 0.8651733397747755,
235
- "train_recall": 0.7787121922468249,
236
- "train_runtime": 114.2154,
237
- "train_samples_per_second": 3.712,
238
- "train_steps_per_second": 0.464
239
  },
240
  {
241
  "epoch": 8.0,
242
- "grad_norm": 2.2226734161376953,
243
- "learning_rate": 0.0002446153846153846,
244
- "loss": 0.9421,
245
  "step": 106
246
  },
247
  {
248
  "epoch": 8.0,
249
- "eval_accuracy": 0.7757009345794392,
250
- "eval_f1": 0.7750786554065242,
251
- "eval_loss": 0.7316702008247375,
252
- "eval_precision": 0.8393822843822842,
253
- "eval_recall": 0.78223226049313,
254
- "eval_runtime": 37.8971,
255
- "eval_samples_per_second": 2.823,
256
- "eval_steps_per_second": 0.369,
257
  "step": 106
258
  },
259
  {
260
  "epoch": 8.98,
261
  "step": 119,
262
- "train_accuracy": 0.8867924528301887,
263
- "train_f1": 0.8861109763599291,
264
- "train_loss": 0.5811343193054199,
265
- "train_precision": 0.9043399022853599,
266
- "train_recall": 0.8790563087733421,
267
- "train_runtime": 114.7718,
268
- "train_samples_per_second": 3.694,
269
- "train_steps_per_second": 0.462
270
  },
271
  {
272
  "epoch": 8.98,
273
- "grad_norm": 2.4611976146698,
274
- "learning_rate": 0.0002746153846153846,
275
- "loss": 0.8662,
276
  "step": 119
277
  },
278
  {
279
  "epoch": 8.98,
280
- "eval_accuracy": 0.8785046728971962,
281
- "eval_f1": 0.8814210982815635,
282
- "eval_loss": 0.5671026110649109,
283
- "eval_precision": 0.8899470899470898,
284
- "eval_recall": 0.880764163372859,
285
- "eval_runtime": 36.298,
286
- "eval_samples_per_second": 2.948,
287
- "eval_steps_per_second": 0.386,
288
  "step": 119
289
  },
290
  {
291
  "epoch": 9.96,
292
  "step": 132,
293
- "train_accuracy": 0.9316037735849056,
294
- "train_f1": 0.9311136538837689,
295
- "train_loss": 0.40401148796081543,
296
- "train_precision": 0.9336201141469944,
297
- "train_recall": 0.9344224750671604,
298
- "train_runtime": 117.7535,
299
- "train_samples_per_second": 3.601,
300
- "train_steps_per_second": 0.45
301
  },
302
  {
303
  "epoch": 9.96,
304
- "grad_norm": 2.956929922103882,
305
- "learning_rate": 0.0002994871794871795,
306
- "loss": 0.7054,
307
  "step": 132
308
  },
309
  {
310
  "epoch": 9.96,
311
- "eval_accuracy": 0.9158878504672897,
312
- "eval_f1": 0.9176957026713124,
313
- "eval_loss": 0.3391460180282593,
314
- "eval_precision": 0.922962962962963,
315
- "eval_recall": 0.9159796725014117,
316
- "eval_runtime": 36.2906,
317
- "eval_samples_per_second": 2.948,
318
- "eval_steps_per_second": 0.386,
319
  "step": 132
320
  },
321
  {
322
  "epoch": 10.94,
323
  "step": 145,
324
- "train_accuracy": 0.9410377358490566,
325
- "train_f1": 0.9417046133414193,
326
- "train_loss": 0.2951917350292206,
327
- "train_precision": 0.9450684559692762,
328
- "train_recall": 0.9400955207310657,
329
- "train_runtime": 119.8783,
330
- "train_samples_per_second": 3.537,
331
- "train_steps_per_second": 0.442
332
  },
333
  {
334
  "epoch": 10.94,
335
- "grad_norm": 2.339923143386841,
336
- "learning_rate": 0.00029615384615384616,
337
- "loss": 0.595,
338
  "step": 145
339
  },
340
  {
341
  "epoch": 10.94,
342
- "eval_accuracy": 0.9626168224299065,
343
- "eval_f1": 0.9627651607231869,
344
- "eval_loss": 0.24373753368854523,
345
- "eval_precision": 0.9635610766045548,
346
- "eval_recall": 0.9639939770374552,
347
- "eval_runtime": 38.7327,
348
- "eval_samples_per_second": 2.763,
349
- "eval_steps_per_second": 0.361,
350
  "step": 145
351
  },
352
  {
353
  "epoch": 12.0,
354
  "step": 159,
355
- "train_accuracy": 0.9127358490566038,
356
- "train_f1": 0.9180456649072474,
357
- "train_loss": 0.27252715826034546,
358
- "train_precision": 0.933049846444602,
359
- "train_recall": 0.9085866769706422,
360
- "train_runtime": 116.9422,
361
- "train_samples_per_second": 3.626,
362
- "train_steps_per_second": 0.453
363
  },
364
  {
365
  "epoch": 12.0,
366
- "grad_norm": 4.72435188293457,
367
- "learning_rate": 0.00029256410256410254,
368
- "loss": 0.4509,
369
  "step": 159
370
  },
371
  {
372
  "epoch": 12.0,
373
- "eval_accuracy": 0.9252336448598131,
374
- "eval_f1": 0.9260400996325433,
375
- "eval_loss": 0.22485339641571045,
376
- "eval_precision": 0.9322943722943723,
377
- "eval_recall": 0.9283832109919066,
378
- "eval_runtime": 35.9322,
379
- "eval_samples_per_second": 2.978,
380
- "eval_steps_per_second": 0.39,
381
  "step": 159
382
  },
383
  {
384
  "epoch": 12.98,
385
- "step": 169,
386
- "train_accuracy": 0.9363207547169812,
387
- "train_f1": 0.9364699926202164,
388
- "train_loss": 0.23557166755199432,
389
- "train_precision": 0.9409715380405036,
390
- "train_recall": 0.9348327849415139,
391
- "train_runtime": 133.051,
392
- "train_samples_per_second": 3.187,
393
- "train_steps_per_second": 0.398
394
  },
395
  {
396
  "epoch": 12.98,
397
- "grad_norm": 4.02577543258667,
398
- "learning_rate": 0.00011666666666666665,
399
- "loss": 0.4954,
400
- "step": 169
401
  },
402
  {
403
  "epoch": 12.98,
404
- "eval_accuracy": 0.9813084112149533,
405
- "eval_f1": 0.9844101823682084,
406
- "eval_loss": 0.09772800654172897,
407
- "eval_precision": 0.9848484848484848,
408
- "eval_recall": 0.9844155844155844,
409
- "eval_runtime": 46.2598,
410
- "eval_samples_per_second": 2.313,
411
- "eval_steps_per_second": 0.303,
412
- "step": 169
413
  },
414
  {
415
  "epoch": 13.96,
416
- "step": 182,
417
- "train_accuracy": 0.9693396226415094,
418
- "train_f1": 0.9713518400105837,
419
- "train_loss": 0.16475237905979156,
420
- "train_precision": 0.9745377270610154,
421
- "train_recall": 0.9693937925419673,
422
- "train_runtime": 138.0499,
423
- "train_samples_per_second": 3.071,
424
- "train_steps_per_second": 0.384
425
  },
426
  {
427
  "epoch": 13.96,
428
- "grad_norm": 2.480182647705078,
429
- "learning_rate": 9.999999999999999e-05,
430
- "loss": 0.4842,
431
- "step": 182
432
  },
433
  {
434
  "epoch": 13.96,
435
- "eval_accuracy": 0.9906542056074766,
436
- "eval_f1": 0.9922719141323793,
437
- "eval_loss": 0.07943902164697647,
438
- "eval_precision": 0.990909090909091,
439
- "eval_recall": 0.9939393939393939,
440
- "eval_runtime": 60.5689,
441
- "eval_samples_per_second": 1.767,
442
- "eval_steps_per_second": 0.231,
443
- "step": 182
444
  },
445
  {
446
  "epoch": 14.94,
447
- "step": 195,
448
  "train_accuracy": 0.9716981132075472,
449
- "train_f1": 0.9719681475881605,
450
- "train_loss": 0.1421472579240799,
451
- "train_precision": 0.9732013799086496,
452
- "train_recall": 0.9717206659817743,
453
- "train_runtime": 161.6921,
454
- "train_samples_per_second": 2.622,
455
- "train_steps_per_second": 0.328
456
  },
457
  {
458
  "epoch": 14.94,
459
- "grad_norm": 3.177818775177002,
460
- "learning_rate": 8.333333333333333e-05,
461
- "loss": 0.4194,
462
- "step": 195
463
  },
464
  {
465
  "epoch": 14.94,
466
  "eval_accuracy": 0.9906542056074766,
467
  "eval_f1": 0.9922719141323793,
468
- "eval_loss": 0.08730577677488327,
469
  "eval_precision": 0.990909090909091,
470
  "eval_recall": 0.9939393939393939,
471
- "eval_runtime": 47.0651,
472
- "eval_samples_per_second": 2.273,
473
- "eval_steps_per_second": 0.297,
474
- "step": 195
475
  },
476
  {
477
  "epoch": 16.0,
478
- "step": 209,
479
- "train_accuracy": 0.964622641509434,
480
- "train_f1": 0.9654786147790538,
481
- "train_loss": 0.170837864279747,
482
- "train_precision": 0.9662175617420704,
483
- "train_recall": 0.9652730803864843,
484
- "train_runtime": 122.9869,
485
- "train_samples_per_second": 3.448,
486
- "train_steps_per_second": 0.431
487
  },
488
  {
489
  "epoch": 16.0,
490
- "grad_norm": 2.836935520172119,
491
- "learning_rate": 6.538461538461539e-05,
492
- "loss": 0.4353,
493
- "step": 209
494
  },
495
  {
496
  "epoch": 16.0,
497
  "eval_accuracy": 0.9906542056074766,
498
  "eval_f1": 0.9922719141323793,
499
- "eval_loss": 0.07477138936519623,
500
  "eval_precision": 0.990909090909091,
501
  "eval_recall": 0.9939393939393939,
502
- "eval_runtime": 45.8048,
503
- "eval_samples_per_second": 2.336,
504
- "eval_steps_per_second": 0.306,
505
- "step": 209
506
  },
507
  {
508
  "epoch": 16.98,
509
- "step": 222,
510
- "train_accuracy": 0.9693396226415094,
511
- "train_f1": 0.9698753879001953,
512
- "train_loss": 0.13738253712654114,
513
- "train_precision": 0.9698721656123299,
514
- "train_recall": 0.9699632431180802,
515
- "train_runtime": 120.4492,
516
- "train_samples_per_second": 3.52,
517
- "train_steps_per_second": 0.44
518
  },
519
  {
520
  "epoch": 16.98,
521
- "grad_norm": 2.6075973510742188,
522
- "learning_rate": 4.871794871794872e-05,
523
- "loss": 0.3996,
524
- "step": 222
525
  },
526
  {
527
  "epoch": 16.98,
528
  "eval_accuracy": 0.9906542056074766,
529
  "eval_f1": 0.9922719141323793,
530
- "eval_loss": 0.06090036779642105,
531
  "eval_precision": 0.990909090909091,
532
  "eval_recall": 0.9939393939393939,
533
- "eval_runtime": 46.0048,
534
- "eval_samples_per_second": 2.326,
535
- "eval_steps_per_second": 0.304,
536
- "step": 222
537
  },
538
  {
539
  "epoch": 17.96,
540
- "step": 235,
541
- "train_accuracy": 0.9669811320754716,
542
- "train_f1": 0.9669883931291977,
543
- "train_loss": 0.15114478766918182,
544
- "train_precision": 0.9689529722559259,
545
- "train_recall": 0.9654587281320716,
546
- "train_runtime": 117.4904,
547
- "train_samples_per_second": 3.609,
548
- "train_steps_per_second": 0.451
549
  },
550
  {
551
  "epoch": 17.96,
552
- "grad_norm": 3.3314599990844727,
553
- "learning_rate": 3.2051282051282044e-05,
554
- "loss": 0.4445,
555
- "step": 235
556
  },
557
  {
558
  "epoch": 17.96,
559
  "eval_accuracy": 0.9906542056074766,
560
  "eval_f1": 0.9922719141323793,
561
- "eval_loss": 0.06219491362571716,
562
  "eval_precision": 0.990909090909091,
563
  "eval_recall": 0.9939393939393939,
564
- "eval_runtime": 44.1393,
565
- "eval_samples_per_second": 2.424,
566
- "eval_steps_per_second": 0.317,
567
- "step": 235
568
  },
569
  {
570
  "epoch": 18.94,
571
- "step": 248,
572
- "train_accuracy": 0.9740566037735849,
573
- "train_f1": 0.973824007234904,
574
- "train_loss": 0.13619276881217957,
575
- "train_precision": 0.9751913290746111,
576
- "train_recall": 0.9728311732620997,
577
- "train_runtime": 116.7219,
578
- "train_samples_per_second": 3.633,
579
- "train_steps_per_second": 0.454
580
  },
581
  {
582
  "epoch": 18.94,
583
- "grad_norm": 3.210602045059204,
584
- "learning_rate": 1.5384615384615384e-05,
585
- "loss": 0.3838,
586
- "step": 248
587
  },
588
  {
589
  "epoch": 18.94,
590
  "eval_accuracy": 0.9906542056074766,
591
  "eval_f1": 0.9922719141323793,
592
- "eval_loss": 0.0594821497797966,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
593
  "eval_precision": 0.990909090909091,
594
  "eval_recall": 0.9939393939393939,
595
- "eval_runtime": 36.336,
596
- "eval_samples_per_second": 2.945,
597
- "eval_steps_per_second": 0.385,
598
- "step": 248
599
- },
600
- {
601
- "epoch": 19.85,
602
- "step": 260,
603
- "train_accuracy": 0.9599056603773585,
604
- "train_f1": 0.9607465386599348,
605
- "train_loss": 0.16524513065814972,
606
- "train_precision": 0.9620981874857844,
607
- "train_recall": 0.9597895488701212,
608
- "train_runtime": 115.5845,
609
- "train_samples_per_second": 3.668,
610
- "train_steps_per_second": 0.459
611
- },
612
- {
613
- "epoch": 19.85,
614
- "grad_norm": 4.980319976806641,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
615
  "learning_rate": 0.0,
616
- "loss": 0.4,
617
- "step": 260
618
  },
619
  {
620
- "epoch": 19.85,
621
  "eval_accuracy": 0.9906542056074766,
622
  "eval_f1": 0.9922719141323793,
623
- "eval_loss": 0.06939452886581421,
624
  "eval_precision": 0.990909090909091,
625
  "eval_recall": 0.9939393939393939,
626
- "eval_runtime": 35.1775,
627
- "eval_samples_per_second": 3.042,
628
- "eval_steps_per_second": 0.398,
629
- "step": 260
630
- },
631
- {
632
- "epoch": 19.85,
633
- "step": 260,
634
- "total_flos": 1.7791424076806554e+17,
635
- "train_loss": 0.16752889706538274,
636
- "train_runtime": 3044.6611,
637
- "train_samples_per_second": 2.785,
638
- "train_steps_per_second": 0.085
 
 
 
 
 
 
 
 
 
639
  }
640
  ],
641
  "logging_steps": 50,
642
- "max_steps": 260,
643
  "num_input_tokens_seen": 0,
644
- "num_train_epochs": 20,
645
  "save_steps": 500,
646
- "total_flos": 1.7791424076806554e+17,
647
  "train_batch_size": 8,
648
  "trial_name": null,
649
  "trial_params": null
 
1
  {
2
  "best_metric": 0.9906542056074766,
3
+ "best_model_checkpoint": "resnet-50-finetuned-FBark\\checkpoint-198",
4
+ "epoch": 34.339622641509436,
5
  "eval_steps": 500,
6
+ "global_step": 455,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
11
  {
12
  "epoch": 0.98,
13
  "step": 13,
14
+ "train_accuracy": 0.18867924528301888,
15
+ "train_f1": 0.07866239279216843,
16
+ "train_loss": 1.6040071249008179,
17
+ "train_precision": 0.10334148329258355,
18
+ "train_recall": 0.20698380566801616,
19
+ "train_runtime": 132.5495,
20
+ "train_samples_per_second": 3.199,
21
+ "train_steps_per_second": 0.4
22
  },
23
  {
24
  "epoch": 0.98,
25
+ "grad_norm": 0.8319346904754639,
26
+ "learning_rate": 8.478260869565217e-05,
27
+ "loss": 1.6424,
28
  "step": 13
29
  },
30
  {
31
  "epoch": 0.98,
32
+ "eval_accuracy": 0.2336448598130841,
33
+ "eval_f1": 0.10987810004203447,
34
+ "eval_loss": 1.5959796905517578,
35
+ "eval_precision": 0.1749174917491749,
36
+ "eval_recall": 0.22424242424242422,
37
+ "eval_runtime": 39.1296,
38
+ "eval_samples_per_second": 2.735,
39
+ "eval_steps_per_second": 0.358,
40
  "step": 13
41
  },
42
  {
43
  "epoch": 1.96,
44
  "step": 26,
45
+ "train_accuracy": 0.33962264150943394,
46
+ "train_f1": 0.24495658674026793,
47
+ "train_loss": 1.5701098442077637,
48
+ "train_precision": 0.3021095248242063,
49
+ "train_recall": 0.3113901059286784,
50
+ "train_runtime": 127.6225,
51
+ "train_samples_per_second": 3.322,
52
+ "train_steps_per_second": 0.415
53
  },
54
  {
55
  "epoch": 1.96,
56
+ "grad_norm": 1.3721247911453247,
57
+ "learning_rate": 0.00016956521739130433,
58
+ "loss": 1.621,
59
  "step": 26
60
  },
61
  {
62
  "epoch": 1.96,
63
+ "eval_accuracy": 0.411214953271028,
64
+ "eval_f1": 0.26165216896924215,
65
+ "eval_loss": 1.546158790588379,
66
+ "eval_precision": 0.3088888888888889,
67
+ "eval_recall": 0.3116883116883117,
68
+ "eval_runtime": 39.3684,
69
+ "eval_samples_per_second": 2.718,
70
+ "eval_steps_per_second": 0.356,
71
  "step": 26
72
  },
73
  {
74
  "epoch": 2.94,
75
  "step": 39,
76
+ "train_accuracy": 0.37264150943396224,
77
+ "train_f1": 0.2637971738836966,
78
+ "train_loss": 1.5183203220367432,
79
+ "train_precision": 0.35881109762129587,
80
+ "train_recall": 0.34996256447229773,
81
+ "train_runtime": 132.7347,
82
+ "train_samples_per_second": 3.194,
83
+ "train_steps_per_second": 0.399
84
  },
85
  {
86
  "epoch": 2.94,
87
+ "grad_norm": 1.4928213357925415,
88
+ "learning_rate": 0.00025434782608695647,
89
+ "loss": 1.567,
90
  "step": 39
91
  },
92
  {
93
  "epoch": 2.94,
94
+ "eval_accuracy": 0.4766355140186916,
95
+ "eval_f1": 0.30495432955791013,
96
+ "eval_loss": 1.4607292413711548,
97
+ "eval_precision": 0.3638922888616891,
98
+ "eval_recall": 0.3748917748917749,
99
+ "eval_runtime": 41.5534,
100
+ "eval_samples_per_second": 2.575,
101
+ "eval_steps_per_second": 0.337,
102
  "step": 39
103
  },
104
  {
105
  "epoch": 4.0,
106
  "step": 53,
107
+ "train_accuracy": 0.4386792452830189,
108
+ "train_f1": 0.3491466500711846,
109
+ "train_loss": 1.346737027168274,
110
+ "train_precision": 0.7565600797484855,
111
+ "train_recall": 0.41960074270933767,
112
+ "train_runtime": 137.4133,
113
+ "train_samples_per_second": 3.086,
114
+ "train_steps_per_second": 0.386
115
  },
116
  {
117
  "epoch": 4.0,
118
+ "grad_norm": 1.9480384588241577,
119
+ "learning_rate": 0.00029486552567237163,
120
+ "loss": 1.357,
121
  "step": 53
122
  },
123
  {
124
  "epoch": 4.0,
125
+ "eval_accuracy": 0.514018691588785,
126
+ "eval_f1": 0.38211575211575216,
127
+ "eval_loss": 1.2584657669067383,
128
+ "eval_precision": 0.8378205128205127,
129
+ "eval_recall": 0.4251082251082251,
130
+ "eval_runtime": 43.051,
131
+ "eval_samples_per_second": 2.485,
132
+ "eval_steps_per_second": 0.325,
133
  "step": 53
134
  },
135
  {
136
  "epoch": 4.98,
137
  "step": 66,
138
+ "train_accuracy": 0.6957547169811321,
139
+ "train_f1": 0.6891574214359025,
140
+ "train_loss": 1.138260841369629,
141
+ "train_precision": 0.8253012477718361,
142
+ "train_recall": 0.6805251227537071,
143
+ "train_runtime": 126.2306,
144
+ "train_samples_per_second": 3.359,
145
+ "train_steps_per_second": 0.42
146
  },
147
  {
148
  "epoch": 4.98,
149
+ "grad_norm": 1.9529035091400146,
150
+ "learning_rate": 0.0002853300733496332,
151
+ "loss": 1.3203,
152
  "step": 66
153
  },
154
  {
155
  "epoch": 4.98,
156
+ "eval_accuracy": 0.7476635514018691,
157
+ "eval_f1": 0.7225305110805734,
158
+ "eval_loss": 1.0523829460144043,
159
+ "eval_precision": 0.8176507936507935,
160
+ "eval_recall": 0.7064935064935065,
161
+ "eval_runtime": 39.1299,
162
+ "eval_samples_per_second": 2.734,
163
+ "eval_steps_per_second": 0.358,
164
  "step": 66
165
  },
166
  {
167
  "epoch": 5.96,
168
  "step": 79,
169
+ "train_accuracy": 0.7594339622641509,
170
+ "train_f1": 0.7384580979394952,
171
+ "train_loss": 0.8829485774040222,
172
+ "train_precision": 0.86050056869729,
173
+ "train_recall": 0.7432874367985035,
174
+ "train_runtime": 133.9565,
175
+ "train_samples_per_second": 3.165,
176
+ "train_steps_per_second": 0.396
177
  },
178
  {
179
  "epoch": 5.96,
180
+ "grad_norm": 2.4536993503570557,
181
+ "learning_rate": 0.00027579462102689484,
182
+ "loss": 1.1706,
183
  "step": 79
184
  },
185
  {
186
  "epoch": 5.96,
187
+ "eval_accuracy": 0.794392523364486,
188
+ "eval_f1": 0.7846877954646693,
189
+ "eval_loss": 0.8008124828338623,
190
+ "eval_precision": 0.8854145854145855,
191
+ "eval_recall": 0.767965367965368,
192
+ "eval_runtime": 40.731,
193
+ "eval_samples_per_second": 2.627,
194
+ "eval_steps_per_second": 0.344,
195
  "step": 79
196
  },
197
  {
198
  "epoch": 6.94,
199
  "step": 92,
200
+ "train_accuracy": 0.8632075471698113,
201
+ "train_f1": 0.8590685733770105,
202
+ "train_loss": 0.7464644312858582,
203
+ "train_precision": 0.892835269329224,
204
+ "train_recall": 0.8548507199297516,
205
+ "train_runtime": 130.0886,
206
+ "train_samples_per_second": 3.259,
207
  "train_steps_per_second": 0.407
208
  },
209
  {
210
  "epoch": 6.94,
211
+ "grad_norm": 2.5925121307373047,
212
+ "learning_rate": 0.00026625916870415647,
213
+ "loss": 0.9929,
214
  "step": 92
215
  },
216
  {
217
  "epoch": 6.94,
218
+ "eval_accuracy": 0.9158878504672897,
219
+ "eval_f1": 0.9236075036075035,
220
+ "eval_loss": 0.6253050565719604,
221
+ "eval_precision": 0.9368429298864083,
222
+ "eval_recall": 0.9212121212121211,
223
+ "eval_runtime": 38.4775,
224
+ "eval_samples_per_second": 2.781,
225
+ "eval_steps_per_second": 0.364,
226
  "step": 92
227
  },
228
  {
229
  "epoch": 8.0,
230
  "step": 106,
231
+ "train_accuracy": 0.9080188679245284,
232
+ "train_f1": 0.9066600370668294,
233
+ "train_loss": 0.5274814963340759,
234
+ "train_precision": 0.9124982372811825,
235
+ "train_recall": 0.9042126570890489,
236
+ "train_runtime": 129.4721,
237
+ "train_samples_per_second": 3.275,
238
+ "train_steps_per_second": 0.409
239
  },
240
  {
241
  "epoch": 8.0,
242
+ "grad_norm": 2.86348032951355,
243
+ "learning_rate": 0.0002559902200488997,
244
+ "loss": 0.7633,
245
  "step": 106
246
  },
247
  {
248
  "epoch": 8.0,
249
+ "eval_accuracy": 0.9345794392523364,
250
+ "eval_f1": 0.9342456932845948,
251
+ "eval_loss": 0.46806150674819946,
252
+ "eval_precision": 0.9339420289855072,
253
+ "eval_recall": 0.9437229437229437,
254
+ "eval_runtime": 39.4048,
255
+ "eval_samples_per_second": 2.715,
256
+ "eval_steps_per_second": 0.355,
257
  "step": 106
258
  },
259
  {
260
  "epoch": 8.98,
261
  "step": 119,
262
+ "train_accuracy": 0.9363207547169812,
263
+ "train_f1": 0.9358357324202157,
264
+ "train_loss": 0.46290820837020874,
265
+ "train_precision": 0.938202392067757,
266
+ "train_recall": 0.9342575372358033,
267
+ "train_runtime": 133.0687,
268
+ "train_samples_per_second": 3.186,
269
+ "train_steps_per_second": 0.398
270
  },
271
  {
272
  "epoch": 8.98,
273
+ "grad_norm": 3.642646074295044,
274
+ "learning_rate": 0.00024645476772616135,
275
+ "loss": 0.6367,
276
  "step": 119
277
  },
278
  {
279
  "epoch": 8.98,
280
+ "eval_accuracy": 0.9158878504672897,
281
+ "eval_f1": 0.9145165945165944,
282
+ "eval_loss": 0.3800387382507324,
283
+ "eval_precision": 0.912056277056277,
284
+ "eval_recall": 0.9194805194805195,
285
+ "eval_runtime": 48.5414,
286
+ "eval_samples_per_second": 2.204,
287
+ "eval_steps_per_second": 0.288,
288
  "step": 119
289
  },
290
  {
291
  "epoch": 9.96,
292
  "step": 132,
293
+ "train_accuracy": 0.9410377358490566,
294
+ "train_f1": 0.9413105716667156,
295
+ "train_loss": 0.36001139879226685,
296
+ "train_precision": 0.941837807815579,
297
+ "train_recall": 0.9410602738360391,
298
+ "train_runtime": 133.0768,
299
+ "train_samples_per_second": 3.186,
300
+ "train_steps_per_second": 0.398
301
  },
302
  {
303
  "epoch": 9.96,
304
+ "grad_norm": 2.934262275695801,
305
+ "learning_rate": 0.00023691931540342298,
306
+ "loss": 0.5834,
307
  "step": 132
308
  },
309
  {
310
  "epoch": 9.96,
311
+ "eval_accuracy": 0.9532710280373832,
312
+ "eval_f1": 0.9551136455716718,
313
+ "eval_loss": 0.26904991269111633,
314
+ "eval_precision": 0.959457478005865,
315
+ "eval_recall": 0.9541125541125541,
316
+ "eval_runtime": 42.7878,
317
+ "eval_samples_per_second": 2.501,
318
+ "eval_steps_per_second": 0.327,
319
  "step": 132
320
  },
321
  {
322
  "epoch": 10.94,
323
  "step": 145,
324
+ "train_accuracy": 0.9504716981132075,
325
+ "train_f1": 0.9503020748526174,
326
+ "train_loss": 0.25279200077056885,
327
+ "train_precision": 0.9508748114630468,
328
+ "train_recall": 0.9503638914618925,
329
+ "train_runtime": 134.3359,
330
+ "train_samples_per_second": 3.156,
331
+ "train_steps_per_second": 0.395
332
  },
333
  {
334
  "epoch": 10.94,
335
+ "grad_norm": 3.635103464126587,
336
+ "learning_rate": 0.00022738386308068459,
337
+ "loss": 0.4842,
338
  "step": 145
339
  },
340
  {
341
  "epoch": 10.94,
342
+ "eval_accuracy": 0.9813084112149533,
343
+ "eval_f1": 0.984659090909091,
344
+ "eval_loss": 0.16999471187591553,
345
+ "eval_precision": 0.9826086956521738,
346
+ "eval_recall": 0.9878787878787879,
347
+ "eval_runtime": 49.1618,
348
+ "eval_samples_per_second": 2.176,
349
+ "eval_steps_per_second": 0.285,
350
  "step": 145
351
  },
352
  {
353
  "epoch": 12.0,
354
  "step": 159,
355
+ "train_accuracy": 0.9693396226415094,
356
+ "train_f1": 0.9695757850179305,
357
+ "train_loss": 0.1881800889968872,
358
+ "train_precision": 0.9693384564611929,
359
+ "train_recall": 0.9709818221559601,
360
+ "train_runtime": 128.8721,
361
+ "train_samples_per_second": 3.29,
362
+ "train_steps_per_second": 0.411
363
  },
364
  {
365
  "epoch": 12.0,
366
+ "grad_norm": 2.419642925262451,
367
+ "learning_rate": 0.00021711491442542784,
368
+ "loss": 0.4302,
369
  "step": 159
370
  },
371
  {
372
  "epoch": 12.0,
373
+ "eval_accuracy": 0.9626168224299065,
374
+ "eval_f1": 0.9676432095036744,
375
+ "eval_loss": 0.17427879571914673,
376
+ "eval_precision": 0.9648221343873518,
377
+ "eval_recall": 0.9722943722943723,
378
+ "eval_runtime": 41.3417,
379
+ "eval_samples_per_second": 2.588,
380
+ "eval_steps_per_second": 0.339,
381
  "step": 159
382
  },
383
  {
384
  "epoch": 12.98,
385
+ "step": 172,
386
+ "train_accuracy": 0.964622641509434,
387
+ "train_f1": 0.9651368628644732,
388
+ "train_loss": 0.1646902710199356,
389
+ "train_precision": 0.9645891898165841,
390
+ "train_recall": 0.9665469535253202,
391
+ "train_runtime": 134.1065,
392
+ "train_samples_per_second": 3.162,
393
+ "train_steps_per_second": 0.395
394
  },
395
  {
396
  "epoch": 12.98,
397
+ "grad_norm": 4.919209003448486,
398
+ "learning_rate": 0.00020757946210268947,
399
+ "loss": 0.4422,
400
+ "step": 172
401
  },
402
  {
403
  "epoch": 12.98,
404
+ "eval_accuracy": 0.9719626168224299,
405
+ "eval_f1": 0.9771428571428572,
406
+ "eval_loss": 0.13857078552246094,
407
+ "eval_precision": 0.975,
408
+ "eval_recall": 0.9818181818181818,
409
+ "eval_runtime": 39.4778,
410
+ "eval_samples_per_second": 2.71,
411
+ "eval_steps_per_second": 0.355,
412
+ "step": 172
413
  },
414
  {
415
  "epoch": 13.96,
416
+ "step": 185,
417
+ "train_accuracy": 0.9716981132075472,
418
+ "train_f1": 0.9709900945487153,
419
+ "train_loss": 0.13838660717010498,
420
+ "train_precision": 0.9710717151425976,
421
+ "train_recall": 0.9710504067284639,
422
+ "train_runtime": 130.0755,
423
+ "train_samples_per_second": 3.26,
424
+ "train_steps_per_second": 0.407
425
  },
426
  {
427
  "epoch": 13.96,
428
+ "grad_norm": 4.021721839904785,
429
+ "learning_rate": 0.0001980440097799511,
430
+ "loss": 0.4237,
431
+ "step": 185
432
  },
433
  {
434
  "epoch": 13.96,
435
+ "eval_accuracy": 0.9626168224299065,
436
+ "eval_f1": 0.9697054698457223,
437
+ "eval_loss": 0.12292856723070145,
438
+ "eval_precision": 0.968,
439
+ "eval_recall": 0.9757575757575758,
440
+ "eval_runtime": 39.3533,
441
+ "eval_samples_per_second": 2.719,
442
+ "eval_steps_per_second": 0.356,
443
+ "step": 185
444
  },
445
  {
446
  "epoch": 14.94,
447
+ "step": 198,
448
  "train_accuracy": 0.9716981132075472,
449
+ "train_f1": 0.9707363445629333,
450
+ "train_loss": 0.15636524558067322,
451
+ "train_precision": 0.9719248605013513,
452
+ "train_recall": 0.9701030873944789,
453
+ "train_runtime": 128.9206,
454
+ "train_samples_per_second": 3.289,
455
+ "train_steps_per_second": 0.411
456
  },
457
  {
458
  "epoch": 14.94,
459
+ "grad_norm": 2.6144134998321533,
460
+ "learning_rate": 0.00018850855745721268,
461
+ "loss": 0.367,
462
+ "step": 198
463
  },
464
  {
465
  "epoch": 14.94,
466
  "eval_accuracy": 0.9906542056074766,
467
  "eval_f1": 0.9922719141323793,
468
+ "eval_loss": 0.1049351617693901,
469
  "eval_precision": 0.990909090909091,
470
  "eval_recall": 0.9939393939393939,
471
+ "eval_runtime": 39.5629,
472
+ "eval_samples_per_second": 2.705,
473
+ "eval_steps_per_second": 0.354,
474
+ "step": 198
475
  },
476
  {
477
  "epoch": 16.0,
478
+ "step": 212,
479
+ "train_accuracy": 0.9858490566037735,
480
+ "train_f1": 0.9859794210341276,
481
+ "train_loss": 0.11216574162244797,
482
+ "train_precision": 0.9859128049064834,
483
+ "train_recall": 0.986104018607261,
484
+ "train_runtime": 132.2515,
485
+ "train_samples_per_second": 3.206,
486
+ "train_steps_per_second": 0.401
487
  },
488
  {
489
  "epoch": 16.0,
490
+ "grad_norm": 2.7048161029815674,
491
+ "learning_rate": 0.000178239608801956,
492
+ "loss": 0.4376,
493
+ "step": 212
494
  },
495
  {
496
  "epoch": 16.0,
497
  "eval_accuracy": 0.9906542056074766,
498
  "eval_f1": 0.9922719141323793,
499
+ "eval_loss": 0.08710027486085892,
500
  "eval_precision": 0.990909090909091,
501
  "eval_recall": 0.9939393939393939,
502
+ "eval_runtime": 42.0413,
503
+ "eval_samples_per_second": 2.545,
504
+ "eval_steps_per_second": 0.333,
505
+ "step": 212
506
  },
507
  {
508
  "epoch": 16.98,
509
+ "step": 225,
510
+ "train_accuracy": 0.9787735849056604,
511
+ "train_f1": 0.978423027691737,
512
+ "train_loss": 0.10880015045404434,
513
+ "train_precision": 0.9783562367864693,
514
+ "train_recall": 0.9791313538827833,
515
+ "train_runtime": 131.0592,
516
+ "train_samples_per_second": 3.235,
517
+ "train_steps_per_second": 0.404
518
  },
519
  {
520
  "epoch": 16.98,
521
+ "grad_norm": 2.9348771572113037,
522
+ "learning_rate": 0.0001687041564792176,
523
+ "loss": 0.3638,
524
+ "step": 225
525
  },
526
  {
527
  "epoch": 16.98,
528
  "eval_accuracy": 0.9906542056074766,
529
  "eval_f1": 0.9922719141323793,
530
+ "eval_loss": 0.07979033887386322,
531
  "eval_precision": 0.990909090909091,
532
  "eval_recall": 0.9939393939393939,
533
+ "eval_runtime": 38.9013,
534
+ "eval_samples_per_second": 2.751,
535
+ "eval_steps_per_second": 0.36,
536
+ "step": 225
537
  },
538
  {
539
  "epoch": 17.96,
540
+ "step": 238,
541
+ "train_accuracy": 0.9811320754716981,
542
+ "train_f1": 0.9813568397733909,
543
+ "train_loss": 0.12247739732265472,
544
+ "train_precision": 0.9824647159390165,
545
+ "train_recall": 0.9805006998510924,
546
+ "train_runtime": 135.9878,
547
+ "train_samples_per_second": 3.118,
548
+ "train_steps_per_second": 0.39
549
  },
550
  {
551
  "epoch": 17.96,
552
+ "grad_norm": 2.9127988815307617,
553
+ "learning_rate": 0.0001591687041564792,
554
+ "loss": 0.3758,
555
+ "step": 238
556
  },
557
  {
558
  "epoch": 17.96,
559
  "eval_accuracy": 0.9906542056074766,
560
  "eval_f1": 0.9922719141323793,
561
+ "eval_loss": 0.05758798122406006,
562
  "eval_precision": 0.990909090909091,
563
  "eval_recall": 0.9939393939393939,
564
+ "eval_runtime": 40.1837,
565
+ "eval_samples_per_second": 2.663,
566
+ "eval_steps_per_second": 0.348,
567
+ "step": 238
568
  },
569
  {
570
  "epoch": 18.94,
571
+ "step": 251,
572
+ "train_accuracy": 0.9858490566037735,
573
+ "train_f1": 0.9849505768779323,
574
+ "train_loss": 0.08141080290079117,
575
+ "train_precision": 0.9856198097123687,
576
+ "train_recall": 0.9845945870999945,
577
+ "train_runtime": 131.3411,
578
+ "train_samples_per_second": 3.228,
579
+ "train_steps_per_second": 0.404
580
  },
581
  {
582
  "epoch": 18.94,
583
+ "grad_norm": 2.887089252471924,
584
+ "learning_rate": 0.00014963325183374083,
585
+ "loss": 0.2759,
586
+ "step": 251
587
  },
588
  {
589
  "epoch": 18.94,
590
  "eval_accuracy": 0.9906542056074766,
591
  "eval_f1": 0.9922719141323793,
592
+ "eval_loss": 0.06044730544090271,
593
+ "eval_precision": 0.990909090909091,
594
+ "eval_recall": 0.9939393939393939,
595
+ "eval_runtime": 56.9665,
596
+ "eval_samples_per_second": 1.878,
597
+ "eval_steps_per_second": 0.246,
598
+ "step": 251
599
+ },
600
+ {
601
+ "epoch": 20.0,
602
+ "step": 265,
603
+ "train_accuracy": 0.9834905660377359,
604
+ "train_f1": 0.9841577997732367,
605
+ "train_loss": 0.09728587418794632,
606
+ "train_precision": 0.9841397108638489,
607
+ "train_recall": 0.9842617289830912,
608
+ "train_runtime": 131.2678,
609
+ "train_samples_per_second": 3.23,
610
+ "train_steps_per_second": 0.404
611
+ },
612
+ {
613
+ "epoch": 20.0,
614
+ "grad_norm": 2.2641186714172363,
615
+ "learning_rate": 0.00013936430317848408,
616
+ "loss": 0.3212,
617
+ "step": 265
618
+ },
619
+ {
620
+ "epoch": 20.0,
621
+ "eval_accuracy": 0.9813084112149533,
622
+ "eval_f1": 0.984659090909091,
623
+ "eval_loss": 0.09081904590129852,
624
+ "eval_precision": 0.9826086956521738,
625
+ "eval_recall": 0.9878787878787879,
626
+ "eval_runtime": 39.7053,
627
+ "eval_samples_per_second": 2.695,
628
+ "eval_steps_per_second": 0.353,
629
+ "step": 265
630
+ },
631
+ {
632
+ "epoch": 20.98,
633
+ "step": 278,
634
+ "train_accuracy": 0.9740566037735849,
635
+ "train_f1": 0.9730482239763667,
636
+ "train_loss": 0.12885905802249908,
637
+ "train_precision": 0.9755676794807229,
638
+ "train_recall": 0.9715263957551142,
639
+ "train_runtime": 129.4684,
640
+ "train_samples_per_second": 3.275,
641
+ "train_steps_per_second": 0.409
642
+ },
643
+ {
644
+ "epoch": 20.98,
645
+ "grad_norm": 3.7218384742736816,
646
+ "learning_rate": 0.00012982885085574571,
647
+ "loss": 0.3215,
648
+ "step": 278
649
+ },
650
+ {
651
+ "epoch": 20.98,
652
+ "eval_accuracy": 0.9906542056074766,
653
+ "eval_f1": 0.9922719141323793,
654
+ "eval_loss": 0.08540945500135422,
655
  "eval_precision": 0.990909090909091,
656
  "eval_recall": 0.9939393939393939,
657
+ "eval_runtime": 39.1648,
658
+ "eval_samples_per_second": 2.732,
659
+ "eval_steps_per_second": 0.357,
660
+ "step": 278
661
+ },
662
+ {
663
+ "epoch": 21.96,
664
+ "step": 291,
665
+ "train_accuracy": 0.9882075471698113,
666
+ "train_f1": 0.987640276713749,
667
+ "train_loss": 0.08325836062431335,
668
+ "train_precision": 0.9890280836661814,
669
+ "train_recall": 0.9866258111031001,
670
+ "train_runtime": 124.9541,
671
+ "train_samples_per_second": 3.393,
672
+ "train_steps_per_second": 0.424
673
+ },
674
+ {
675
+ "epoch": 21.96,
676
+ "grad_norm": 5.685765743255615,
677
+ "learning_rate": 0.00012029339853300733,
678
+ "loss": 0.3545,
679
+ "step": 291
680
+ },
681
+ {
682
+ "epoch": 21.96,
683
+ "eval_accuracy": 0.9906542056074766,
684
+ "eval_f1": 0.9922719141323793,
685
+ "eval_loss": 0.07166730612516403,
686
+ "eval_precision": 0.990909090909091,
687
+ "eval_recall": 0.9939393939393939,
688
+ "eval_runtime": 39.2775,
689
+ "eval_samples_per_second": 2.724,
690
+ "eval_steps_per_second": 0.356,
691
+ "step": 291
692
+ },
693
+ {
694
+ "epoch": 22.94,
695
+ "step": 304,
696
+ "train_accuracy": 0.9834905660377359,
697
+ "train_f1": 0.9840178843462146,
698
+ "train_loss": 0.09864702820777893,
699
+ "train_precision": 0.9840027079631041,
700
+ "train_recall": 0.9844752990764272,
701
+ "train_runtime": 132.2941,
702
+ "train_samples_per_second": 3.205,
703
+ "train_steps_per_second": 0.401
704
+ },
705
+ {
706
+ "epoch": 22.94,
707
+ "grad_norm": 4.6724138259887695,
708
+ "learning_rate": 0.00011075794621026893,
709
+ "loss": 0.3085,
710
+ "step": 304
711
+ },
712
+ {
713
+ "epoch": 22.94,
714
+ "eval_accuracy": 0.9906542056074766,
715
+ "eval_f1": 0.9922719141323793,
716
+ "eval_loss": 0.08209435641765594,
717
+ "eval_precision": 0.990909090909091,
718
+ "eval_recall": 0.9939393939393939,
719
+ "eval_runtime": 39.8399,
720
+ "eval_samples_per_second": 2.686,
721
+ "eval_steps_per_second": 0.351,
722
+ "step": 304
723
+ },
724
+ {
725
+ "epoch": 24.0,
726
+ "step": 318,
727
+ "train_accuracy": 0.9858490566037735,
728
+ "train_f1": 0.9856773749159446,
729
+ "train_loss": 0.08332642912864685,
730
+ "train_precision": 0.9871632432676922,
731
+ "train_recall": 0.9844707203986204,
732
+ "train_runtime": 130.1118,
733
+ "train_samples_per_second": 3.259,
734
+ "train_steps_per_second": 0.407
735
+ },
736
+ {
737
+ "epoch": 24.0,
738
+ "grad_norm": 4.654536724090576,
739
+ "learning_rate": 0.00010048899755501222,
740
+ "loss": 0.2637,
741
+ "step": 318
742
+ },
743
+ {
744
+ "epoch": 24.0,
745
+ "eval_accuracy": 0.9906542056074766,
746
+ "eval_f1": 0.9922719141323793,
747
+ "eval_loss": 0.0654672160744667,
748
+ "eval_precision": 0.990909090909091,
749
+ "eval_recall": 0.9939393939393939,
750
+ "eval_runtime": 39.6356,
751
+ "eval_samples_per_second": 2.7,
752
+ "eval_steps_per_second": 0.353,
753
+ "step": 318
754
+ },
755
+ {
756
+ "epoch": 24.98,
757
+ "step": 331,
758
+ "train_accuracy": 0.9858490566037735,
759
+ "train_f1": 0.986589461820231,
760
+ "train_loss": 0.07109413295984268,
761
+ "train_precision": 0.9871650821089023,
762
+ "train_recall": 0.98621216568729,
763
+ "train_runtime": 128.7284,
764
+ "train_samples_per_second": 3.294,
765
+ "train_steps_per_second": 0.412
766
+ },
767
+ {
768
+ "epoch": 24.98,
769
+ "grad_norm": 4.322335243225098,
770
+ "learning_rate": 9.095354523227383e-05,
771
+ "loss": 0.2723,
772
+ "step": 331
773
+ },
774
+ {
775
+ "epoch": 24.98,
776
+ "eval_accuracy": 0.9906542056074766,
777
+ "eval_f1": 0.9922719141323793,
778
+ "eval_loss": 0.06855478882789612,
779
+ "eval_precision": 0.990909090909091,
780
+ "eval_recall": 0.9939393939393939,
781
+ "eval_runtime": 38.4718,
782
+ "eval_samples_per_second": 2.781,
783
+ "eval_steps_per_second": 0.364,
784
+ "step": 331
785
+ },
786
+ {
787
+ "epoch": 25.96,
788
+ "step": 344,
789
+ "train_accuracy": 0.9764150943396226,
790
+ "train_f1": 0.9756975326292678,
791
+ "train_loss": 0.09106432646512985,
792
+ "train_precision": 0.9760480054398313,
793
+ "train_recall": 0.9753730488175062,
794
+ "train_runtime": 132.4151,
795
+ "train_samples_per_second": 3.202,
796
+ "train_steps_per_second": 0.4
797
+ },
798
+ {
799
+ "epoch": 25.96,
800
+ "grad_norm": 4.0115766525268555,
801
+ "learning_rate": 8.141809290953544e-05,
802
+ "loss": 0.36,
803
+ "step": 344
804
+ },
805
+ {
806
+ "epoch": 25.96,
807
+ "eval_accuracy": 0.9906542056074766,
808
+ "eval_f1": 0.9922719141323793,
809
+ "eval_loss": 0.0726209431886673,
810
+ "eval_precision": 0.990909090909091,
811
+ "eval_recall": 0.9939393939393939,
812
+ "eval_runtime": 40.7509,
813
+ "eval_samples_per_second": 2.626,
814
+ "eval_steps_per_second": 0.344,
815
+ "step": 344
816
+ },
817
+ {
818
+ "epoch": 26.94,
819
+ "step": 357,
820
+ "train_accuracy": 0.9740566037735849,
821
+ "train_f1": 0.9741101148833959,
822
+ "train_loss": 0.08721727132797241,
823
+ "train_precision": 0.9743783993783992,
824
+ "train_recall": 0.9741037543564772,
825
+ "train_runtime": 133.2307,
826
+ "train_samples_per_second": 3.182,
827
+ "train_steps_per_second": 0.398
828
+ },
829
+ {
830
+ "epoch": 26.94,
831
+ "grad_norm": 2.0709145069122314,
832
+ "learning_rate": 7.188264058679705e-05,
833
+ "loss": 0.2535,
834
+ "step": 357
835
+ },
836
+ {
837
+ "epoch": 26.94,
838
+ "eval_accuracy": 0.9906542056074766,
839
+ "eval_f1": 0.9922719141323793,
840
+ "eval_loss": 0.06701695173978806,
841
+ "eval_precision": 0.990909090909091,
842
+ "eval_recall": 0.9939393939393939,
843
+ "eval_runtime": 39.1128,
844
+ "eval_samples_per_second": 2.736,
845
+ "eval_steps_per_second": 0.358,
846
+ "step": 357
847
+ },
848
+ {
849
+ "epoch": 28.0,
850
+ "step": 371,
851
+ "train_accuracy": 0.9811320754716981,
852
+ "train_f1": 0.9811807715855693,
853
+ "train_loss": 0.08411888033151627,
854
+ "train_precision": 0.9816060153438795,
855
+ "train_recall": 0.9810544308058603,
856
+ "train_runtime": 138.1723,
857
+ "train_samples_per_second": 3.069,
858
+ "train_steps_per_second": 0.384
859
+ },
860
+ {
861
+ "epoch": 28.0,
862
+ "grad_norm": 2.612853527069092,
863
+ "learning_rate": 6.161369193154034e-05,
864
+ "loss": 0.2551,
865
+ "step": 371
866
+ },
867
+ {
868
+ "epoch": 28.0,
869
+ "eval_accuracy": 0.9906542056074766,
870
+ "eval_f1": 0.9922719141323793,
871
+ "eval_loss": 0.0589648000895977,
872
+ "eval_precision": 0.990909090909091,
873
+ "eval_recall": 0.9939393939393939,
874
+ "eval_runtime": 39.9978,
875
+ "eval_samples_per_second": 2.675,
876
+ "eval_steps_per_second": 0.35,
877
+ "step": 371
878
+ },
879
+ {
880
+ "epoch": 28.98,
881
+ "step": 384,
882
+ "train_accuracy": 0.9764150943396226,
883
+ "train_f1": 0.9760814191422504,
884
+ "train_loss": 0.08149362355470657,
885
+ "train_precision": 0.9773233573176615,
886
+ "train_recall": 0.9755777028177874,
887
+ "train_runtime": 158.8587,
888
+ "train_samples_per_second": 2.669,
889
+ "train_steps_per_second": 0.334
890
+ },
891
+ {
892
+ "epoch": 28.98,
893
+ "grad_norm": 3.303999662399292,
894
+ "learning_rate": 5.207823960880195e-05,
895
+ "loss": 0.3202,
896
+ "step": 384
897
+ },
898
+ {
899
+ "epoch": 28.98,
900
+ "eval_accuracy": 0.9906542056074766,
901
+ "eval_f1": 0.9922719141323793,
902
+ "eval_loss": 0.05450604483485222,
903
+ "eval_precision": 0.990909090909091,
904
+ "eval_recall": 0.9939393939393939,
905
+ "eval_runtime": 40.1368,
906
+ "eval_samples_per_second": 2.666,
907
+ "eval_steps_per_second": 0.349,
908
+ "step": 384
909
+ },
910
+ {
911
+ "epoch": 29.96,
912
+ "step": 397,
913
+ "train_accuracy": 0.9976415094339622,
914
+ "train_f1": 0.9975194273245798,
915
+ "train_loss": 0.05407993122935295,
916
+ "train_precision": 0.9977011494252874,
917
+ "train_recall": 0.9973684210526315,
918
+ "train_runtime": 137.913,
919
+ "train_samples_per_second": 3.074,
920
+ "train_steps_per_second": 0.384
921
+ },
922
+ {
923
+ "epoch": 29.96,
924
+ "grad_norm": 2.247615098953247,
925
+ "learning_rate": 4.2542787286063565e-05,
926
+ "loss": 0.2714,
927
+ "step": 397
928
+ },
929
+ {
930
+ "epoch": 29.96,
931
+ "eval_accuracy": 0.9906542056074766,
932
+ "eval_f1": 0.9922719141323793,
933
+ "eval_loss": 0.04375358670949936,
934
+ "eval_precision": 0.990909090909091,
935
+ "eval_recall": 0.9939393939393939,
936
+ "eval_runtime": 40.7987,
937
+ "eval_samples_per_second": 2.623,
938
+ "eval_steps_per_second": 0.343,
939
+ "step": 397
940
+ },
941
+ {
942
+ "epoch": 30.94,
943
+ "step": 410,
944
+ "train_accuracy": 0.9834905660377359,
945
+ "train_f1": 0.9834196702554923,
946
+ "train_loss": 0.06718786805868149,
947
+ "train_precision": 0.9847436600428245,
948
+ "train_recall": 0.9826715101769175,
949
+ "train_runtime": 130.6854,
950
+ "train_samples_per_second": 3.244,
951
+ "train_steps_per_second": 0.406
952
+ },
953
+ {
954
+ "epoch": 30.94,
955
+ "grad_norm": 4.1858415603637695,
956
+ "learning_rate": 3.300733496332518e-05,
957
+ "loss": 0.2362,
958
+ "step": 410
959
+ },
960
+ {
961
+ "epoch": 30.94,
962
+ "eval_accuracy": 0.9906542056074766,
963
+ "eval_f1": 0.9922719141323793,
964
+ "eval_loss": 0.05211889371275902,
965
+ "eval_precision": 0.990909090909091,
966
+ "eval_recall": 0.9939393939393939,
967
+ "eval_runtime": 39.7771,
968
+ "eval_samples_per_second": 2.69,
969
+ "eval_steps_per_second": 0.352,
970
+ "step": 410
971
+ },
972
+ {
973
+ "epoch": 32.0,
974
+ "step": 424,
975
+ "train_accuracy": 0.9834905660377359,
976
+ "train_f1": 0.9831986473322998,
977
+ "train_loss": 0.09282960742712021,
978
+ "train_precision": 0.9842538190364276,
979
+ "train_recall": 0.9827365778544793,
980
+ "train_runtime": 128.6901,
981
+ "train_samples_per_second": 3.295,
982
+ "train_steps_per_second": 0.412
983
+ },
984
+ {
985
+ "epoch": 32.0,
986
+ "grad_norm": 3.333651542663574,
987
+ "learning_rate": 2.273838630806846e-05,
988
+ "loss": 0.2693,
989
+ "step": 424
990
+ },
991
+ {
992
+ "epoch": 32.0,
993
+ "eval_accuracy": 0.9906542056074766,
994
+ "eval_f1": 0.9922719141323793,
995
+ "eval_loss": 0.03920552134513855,
996
+ "eval_precision": 0.990909090909091,
997
+ "eval_recall": 0.9939393939393939,
998
+ "eval_runtime": 39.2034,
999
+ "eval_samples_per_second": 2.729,
1000
+ "eval_steps_per_second": 0.357,
1001
+ "step": 424
1002
+ },
1003
+ {
1004
+ "epoch": 32.98,
1005
+ "step": 437,
1006
+ "train_accuracy": 0.9834905660377359,
1007
+ "train_f1": 0.9830305559415786,
1008
+ "train_loss": 0.06464195251464844,
1009
+ "train_precision": 0.9849462365591398,
1010
+ "train_recall": 0.9821793023126837,
1011
+ "train_runtime": 132.3464,
1012
+ "train_samples_per_second": 3.204,
1013
+ "train_steps_per_second": 0.4
1014
+ },
1015
+ {
1016
+ "epoch": 32.98,
1017
+ "grad_norm": 4.870348930358887,
1018
+ "learning_rate": 1.3202933985330072e-05,
1019
+ "loss": 0.2644,
1020
+ "step": 437
1021
+ },
1022
+ {
1023
+ "epoch": 32.98,
1024
+ "eval_accuracy": 0.9906542056074766,
1025
+ "eval_f1": 0.9922719141323793,
1026
+ "eval_loss": 0.06383071094751358,
1027
+ "eval_precision": 0.990909090909091,
1028
+ "eval_recall": 0.9939393939393939,
1029
+ "eval_runtime": 39.8229,
1030
+ "eval_samples_per_second": 2.687,
1031
+ "eval_steps_per_second": 0.352,
1032
+ "step": 437
1033
+ },
1034
+ {
1035
+ "epoch": 33.96,
1036
+ "step": 450,
1037
+ "train_accuracy": 0.9834905660377359,
1038
+ "train_f1": 0.983306080394328,
1039
+ "train_loss": 0.09585532546043396,
1040
+ "train_precision": 0.9838161838161839,
1041
+ "train_recall": 0.983380012201209,
1042
+ "train_runtime": 133.1315,
1043
+ "train_samples_per_second": 3.185,
1044
+ "train_steps_per_second": 0.398
1045
+ },
1046
+ {
1047
+ "epoch": 33.96,
1048
+ "grad_norm": 3.289733409881592,
1049
+ "learning_rate": 3.667481662591687e-06,
1050
+ "loss": 0.2516,
1051
+ "step": 450
1052
+ },
1053
+ {
1054
+ "epoch": 33.96,
1055
+ "eval_accuracy": 0.9906542056074766,
1056
+ "eval_f1": 0.9922719141323793,
1057
+ "eval_loss": 0.04781457036733627,
1058
+ "eval_precision": 0.990909090909091,
1059
+ "eval_recall": 0.9939393939393939,
1060
+ "eval_runtime": 40.0245,
1061
+ "eval_samples_per_second": 2.673,
1062
+ "eval_steps_per_second": 0.35,
1063
+ "step": 450
1064
+ },
1065
+ {
1066
+ "epoch": 34.34,
1067
+ "step": 455,
1068
+ "train_accuracy": 0.9740566037735849,
1069
+ "train_f1": 0.9731839886499556,
1070
+ "train_loss": 0.0982045829296112,
1071
+ "train_precision": 0.9733410852713178,
1072
+ "train_recall": 0.9733575444357457,
1073
+ "train_runtime": 130.0436,
1074
+ "train_samples_per_second": 3.26,
1075
+ "train_steps_per_second": 0.408
1076
+ },
1077
+ {
1078
+ "epoch": 34.34,
1079
+ "grad_norm": 2.205134630203247,
1080
  "learning_rate": 0.0,
1081
+ "loss": 0.2652,
1082
+ "step": 455
1083
  },
1084
  {
1085
+ "epoch": 34.34,
1086
  "eval_accuracy": 0.9906542056074766,
1087
  "eval_f1": 0.9922719141323793,
1088
+ "eval_loss": 0.0579226091504097,
1089
  "eval_precision": 0.990909090909091,
1090
  "eval_recall": 0.9939393939393939,
1091
+ "eval_runtime": 39.7063,
1092
+ "eval_samples_per_second": 2.695,
1093
+ "eval_steps_per_second": 0.353,
1094
+ "step": 455
1095
+ },
1096
+ {
1097
+ "epoch": 34.34,
1098
+ "step": 455,
1099
+ "total_flos": 3.0926830773436416e+17,
1100
+ "train_loss": 0.571298942723117,
1101
+ "train_runtime": 12111.8963,
1102
+ "train_samples_per_second": 1.225,
1103
+ "train_steps_per_second": 0.038
1104
+ },
1105
+ {
1106
+ "epoch": 34.34,
1107
+ "step": 455,
1108
+ "total_flos": 3.0926830773436416e+17,
1109
+ "train_loss": 0.0,
1110
+ "train_runtime": 0.0155,
1111
+ "train_samples_per_second": 955313.811,
1112
+ "train_steps_per_second": 29290.282
1113
  }
1114
  ],
1115
  "logging_steps": 50,
1116
+ "max_steps": 455,
1117
  "num_input_tokens_seen": 0,
1118
+ "num_train_epochs": 35,
1119
  "save_steps": 500,
1120
+ "total_flos": 3.0926830773436416e+17,
1121
  "train_batch_size": 8,
1122
  "trial_name": null,
1123
  "trial_params": null