paul commited on
Commit
351d245
1 Parent(s): 1e9495f

End of training

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 19.97,
3
- "total_flos": 8.788316705651589e+17,
4
- "train_loss": 1.1949474394321442,
5
- "train_runtime": 951.8189,
6
- "train_samples_per_second": 43.496,
7
- "train_steps_per_second": 0.168
8
  }
 
1
  {
2
+ "epoch": 79.97,
3
+ "total_flos": 3.5167284631649157e+18,
4
+ "train_loss": 0.37728101573884487,
5
+ "train_runtime": 2345.4183,
6
+ "train_samples_per_second": 70.606,
7
+ "train_steps_per_second": 0.273
8
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16e9983b0b302f85820432462d0332e50a532d81fb956582ac981f6875ff753f
3
  size 94391373
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:797ad435f3e4fc6eb51fbf8fb1645d146d75605941de95af5da769f7ccd82fe7
3
  size 94391373
runs/Feb02_19-21-12_teesta/events.out.tfevents.1675345885.teesta.1377.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5344b75f15da8acf194e4fd348bd3c2ba215de040e148f92a99484fe4f893555
3
- size 44308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d861004a79be1f7adec464b5a4348942865c40ac7afa2a63cfaeb1a24999eefa
3
+ size 51896
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 19.97,
3
- "total_flos": 8.788316705651589e+17,
4
- "train_loss": 1.1949474394321442,
5
- "train_runtime": 951.8189,
6
- "train_samples_per_second": 43.496,
7
- "train_steps_per_second": 0.168
8
  }
 
1
  {
2
+ "epoch": 79.97,
3
+ "total_flos": 3.5167284631649157e+18,
4
+ "train_loss": 0.37728101573884487,
5
+ "train_runtime": 2345.4183,
6
+ "train_samples_per_second": 70.606,
7
+ "train_steps_per_second": 0.273
8
  }
trainer_state.json CHANGED
@@ -1,361 +1,1369 @@
1
  {
2
- "best_metric": 0.6697247706422018,
3
- "best_model_checkpoint": "microsoft-resnet-50-cartoon-emotion-detection/checkpoint-160",
4
- "epoch": 19.96969696969697,
5
- "global_step": 160,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.97,
12
- "eval_accuracy": 0.24770642201834864,
13
- "eval_f1": 0.2041566786731761,
14
- "eval_loss": 1.3832563161849976,
15
- "eval_precision": 0.20537582488330916,
16
- "eval_recall": 0.24770642201834864,
17
- "eval_runtime": 6.8665,
18
- "eval_samples_per_second": 15.874,
19
- "eval_steps_per_second": 0.291,
20
  "step": 8
21
  },
22
  {
23
  "epoch": 1.24,
24
- "learning_rate": 7.500000000000001e-05,
25
- "loss": 1.4276,
26
  "step": 10
27
  },
28
  {
29
  "epoch": 1.97,
30
- "eval_accuracy": 0.30275229357798167,
31
- "eval_f1": 0.19323441779945572,
32
- "eval_loss": 1.3710838556289673,
33
- "eval_precision": 0.19816345733776927,
34
- "eval_recall": 0.30275229357798167,
35
- "eval_runtime": 4.187,
36
- "eval_samples_per_second": 26.033,
37
- "eval_steps_per_second": 0.478,
38
  "step": 16
39
  },
40
  {
41
  "epoch": 2.48,
42
- "learning_rate": 0.00011666666666666667,
43
- "loss": 1.4046,
44
  "step": 20
45
  },
46
  {
47
  "epoch": 2.97,
48
- "eval_accuracy": 0.30275229357798167,
49
- "eval_f1": 0.14071585476159712,
50
- "eval_loss": 1.3549774885177612,
51
- "eval_precision": 0.09165895126672839,
52
- "eval_recall": 0.30275229357798167,
53
- "eval_runtime": 4.1191,
54
- "eval_samples_per_second": 26.462,
55
- "eval_steps_per_second": 0.486,
56
  "step": 24
57
  },
58
  {
59
  "epoch": 3.73,
60
- "learning_rate": 0.00010833333333333334,
61
- "loss": 1.3817,
62
  "step": 30
63
  },
64
  {
65
  "epoch": 3.97,
66
- "eval_accuracy": 0.3119266055045872,
67
- "eval_f1": 0.15922843504338724,
68
- "eval_loss": 1.3374683856964111,
69
- "eval_precision": 0.2851681957186545,
70
- "eval_recall": 0.3119266055045872,
71
- "eval_runtime": 4.1507,
72
- "eval_samples_per_second": 26.26,
73
- "eval_steps_per_second": 0.482,
74
  "step": 32
75
  },
76
  {
77
  "epoch": 4.97,
78
- "learning_rate": 0.0001,
79
- "loss": 1.3562,
80
  "step": 40
81
  },
82
  {
83
  "epoch": 4.97,
84
- "eval_accuracy": 0.3211009174311927,
85
- "eval_f1": 0.17853677613274146,
86
- "eval_loss": 1.3179248571395874,
87
- "eval_precision": 0.433702613813398,
88
- "eval_recall": 0.3211009174311927,
89
- "eval_runtime": 3.9993,
90
- "eval_samples_per_second": 27.255,
91
- "eval_steps_per_second": 0.5,
92
  "step": 40
93
  },
94
  {
95
  "epoch": 5.97,
96
- "eval_accuracy": 0.3761467889908257,
97
- "eval_f1": 0.27407584962221454,
98
- "eval_loss": 1.2991451025009155,
99
- "eval_precision": 0.5442070773263434,
100
- "eval_recall": 0.3761467889908257,
101
- "eval_runtime": 4.0689,
102
- "eval_samples_per_second": 26.789,
103
- "eval_steps_per_second": 0.492,
104
  "step": 48
105
  },
106
  {
107
  "epoch": 6.24,
108
- "learning_rate": 9.166666666666667e-05,
109
- "loss": 1.3624,
110
  "step": 50
111
  },
112
  {
113
  "epoch": 6.97,
114
- "eval_accuracy": 0.44954128440366975,
115
- "eval_f1": 0.36590235512607705,
116
- "eval_loss": 1.2751092910766602,
117
- "eval_precision": 0.5593305776792015,
118
- "eval_recall": 0.44954128440366975,
119
- "eval_runtime": 4.2081,
120
- "eval_samples_per_second": 25.902,
121
- "eval_steps_per_second": 0.475,
122
  "step": 56
123
  },
124
  {
125
  "epoch": 7.48,
126
- "learning_rate": 8.333333333333333e-05,
127
- "loss": 1.2914,
128
  "step": 60
129
  },
130
  {
131
  "epoch": 7.97,
132
- "eval_accuracy": 0.47706422018348627,
133
- "eval_f1": 0.4093504976516682,
134
- "eval_loss": 1.2494499683380127,
135
- "eval_precision": 0.5442467116171107,
136
- "eval_recall": 0.47706422018348627,
137
- "eval_runtime": 5.2925,
138
- "eval_samples_per_second": 20.595,
139
- "eval_steps_per_second": 0.378,
140
  "step": 64
141
  },
142
  {
143
  "epoch": 8.73,
144
- "learning_rate": 7.500000000000001e-05,
145
- "loss": 1.2518,
146
  "step": 70
147
  },
148
  {
149
  "epoch": 8.97,
150
- "eval_accuracy": 0.5045871559633027,
151
- "eval_f1": 0.4430477860209126,
152
- "eval_loss": 1.2278722524642944,
153
- "eval_precision": 0.5524980055843638,
154
- "eval_recall": 0.5045871559633027,
155
- "eval_runtime": 4.1494,
156
- "eval_samples_per_second": 26.269,
157
- "eval_steps_per_second": 0.482,
158
  "step": 72
159
  },
160
  {
161
  "epoch": 9.97,
162
- "learning_rate": 6.666666666666667e-05,
163
- "loss": 1.2085,
164
  "step": 80
165
  },
166
  {
167
  "epoch": 9.97,
168
- "eval_accuracy": 0.5321100917431193,
169
- "eval_f1": 0.457935571494903,
170
- "eval_loss": 1.1905453205108643,
171
- "eval_precision": 0.5134095764735453,
172
- "eval_recall": 0.5321100917431193,
173
- "eval_runtime": 4.0094,
174
- "eval_samples_per_second": 27.186,
175
- "eval_steps_per_second": 0.499,
176
  "step": 80
177
  },
178
  {
179
  "epoch": 10.97,
180
- "eval_accuracy": 0.5504587155963303,
181
- "eval_f1": 0.48717462017050756,
182
- "eval_loss": 1.1602399349212646,
183
- "eval_precision": 0.515116763969975,
184
- "eval_recall": 0.5504587155963303,
185
- "eval_runtime": 4.1843,
186
- "eval_samples_per_second": 26.05,
187
- "eval_steps_per_second": 0.478,
188
  "step": 88
189
  },
190
  {
191
  "epoch": 11.24,
192
- "learning_rate": 5.833333333333333e-05,
193
- "loss": 1.1865,
194
  "step": 90
195
  },
196
  {
197
  "epoch": 11.97,
198
- "eval_accuracy": 0.5963302752293578,
199
- "eval_f1": 0.5416442261263603,
200
- "eval_loss": 1.130654215812683,
201
- "eval_precision": 0.5968683580403025,
202
- "eval_recall": 0.5963302752293578,
203
- "eval_runtime": 4.1467,
204
- "eval_samples_per_second": 26.286,
205
- "eval_steps_per_second": 0.482,
206
  "step": 96
207
  },
208
  {
209
  "epoch": 12.48,
210
- "learning_rate": 5e-05,
211
- "loss": 1.122,
212
  "step": 100
213
  },
214
  {
215
  "epoch": 12.97,
216
- "eval_accuracy": 0.5871559633027523,
217
- "eval_f1": 0.5205747436589314,
218
- "eval_loss": 1.1036800146102905,
219
- "eval_precision": 0.5069059380985986,
220
- "eval_recall": 0.5871559633027523,
221
- "eval_runtime": 4.1081,
222
- "eval_samples_per_second": 26.533,
223
- "eval_steps_per_second": 0.487,
224
  "step": 104
225
  },
226
  {
227
  "epoch": 13.73,
228
- "learning_rate": 4.1666666666666665e-05,
229
- "loss": 1.0812,
230
  "step": 110
231
  },
232
  {
233
  "epoch": 13.97,
234
- "eval_accuracy": 0.5688073394495413,
235
- "eval_f1": 0.506813344115706,
236
- "eval_loss": 1.0797398090362549,
237
- "eval_precision": 0.4868139278766149,
238
- "eval_recall": 0.5688073394495413,
239
- "eval_runtime": 5.1378,
240
- "eval_samples_per_second": 21.215,
241
- "eval_steps_per_second": 0.389,
242
  "step": 112
243
  },
244
  {
245
  "epoch": 14.97,
246
- "learning_rate": 3.3333333333333335e-05,
247
- "loss": 1.0449,
248
  "step": 120
249
  },
250
  {
251
  "epoch": 14.97,
252
- "eval_accuracy": 0.6238532110091743,
253
- "eval_f1": 0.5641135054405804,
254
- "eval_loss": 1.0711662769317627,
255
- "eval_precision": 0.5269170849922105,
256
- "eval_recall": 0.6238532110091743,
257
- "eval_runtime": 4.0775,
258
- "eval_samples_per_second": 26.732,
259
- "eval_steps_per_second": 0.49,
260
  "step": 120
261
  },
262
  {
263
  "epoch": 15.97,
264
- "eval_accuracy": 0.6238532110091743,
265
- "eval_f1": 0.5516990654605334,
266
- "eval_loss": 1.042523980140686,
267
- "eval_precision": 0.5122844888800522,
268
- "eval_recall": 0.6238532110091743,
269
- "eval_runtime": 4.1924,
270
- "eval_samples_per_second": 26.0,
271
- "eval_steps_per_second": 0.477,
272
  "step": 128
273
  },
274
  {
275
  "epoch": 16.24,
276
- "learning_rate": 2.5e-05,
277
- "loss": 1.0458,
278
  "step": 130
279
  },
280
  {
281
  "epoch": 16.97,
282
- "eval_accuracy": 0.6238532110091743,
283
- "eval_f1": 0.5782110340855532,
284
- "eval_loss": 1.0345710515975952,
285
- "eval_precision": 0.6487350569919378,
286
- "eval_recall": 0.6238532110091743,
287
- "eval_runtime": 4.1751,
288
- "eval_samples_per_second": 26.107,
289
- "eval_steps_per_second": 0.479,
290
  "step": 136
291
  },
292
  {
293
  "epoch": 17.48,
294
- "learning_rate": 1.6666666666666667e-05,
295
- "loss": 1.004,
296
  "step": 140
297
  },
298
  {
299
  "epoch": 17.97,
300
- "eval_accuracy": 0.6330275229357798,
301
- "eval_f1": 0.5720803372179519,
302
- "eval_loss": 1.0264408588409424,
303
- "eval_precision": 0.5471838739885577,
304
- "eval_recall": 0.6330275229357798,
305
- "eval_runtime": 4.1766,
306
- "eval_samples_per_second": 26.098,
307
- "eval_steps_per_second": 0.479,
308
  "step": 144
309
  },
310
  {
311
  "epoch": 18.73,
312
- "learning_rate": 8.333333333333334e-06,
313
- "loss": 0.9806,
314
  "step": 150
315
  },
316
  {
317
  "epoch": 18.97,
318
- "eval_accuracy": 0.6605504587155964,
319
- "eval_f1": 0.6069485442003585,
320
- "eval_loss": 1.0041249990463257,
321
- "eval_precision": 0.6334097859327217,
322
- "eval_recall": 0.6605504587155964,
323
- "eval_runtime": 4.1778,
324
- "eval_samples_per_second": 26.09,
325
- "eval_steps_per_second": 0.479,
326
  "step": 152
327
  },
328
  {
329
  "epoch": 19.97,
330
- "learning_rate": 0.0,
331
- "loss": 0.97,
332
  "step": 160
333
  },
334
  {
335
  "epoch": 19.97,
336
- "eval_accuracy": 0.6697247706422018,
337
- "eval_f1": 0.6086361803243947,
338
- "eval_loss": 1.0058709383010864,
339
- "eval_precision": 0.5798801171844885,
340
- "eval_recall": 0.6697247706422018,
341
- "eval_runtime": 4.221,
342
- "eval_samples_per_second": 25.824,
343
- "eval_steps_per_second": 0.474,
344
  "step": 160
345
  },
346
  {
347
- "epoch": 19.97,
348
- "step": 160,
349
- "total_flos": 8.788316705651589e+17,
350
- "train_loss": 1.1949474394321442,
351
- "train_runtime": 951.8189,
352
- "train_samples_per_second": 43.496,
353
- "train_steps_per_second": 0.168
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
354
  }
355
  ],
356
- "max_steps": 160,
357
- "num_train_epochs": 20,
358
- "total_flos": 8.788316705651589e+17,
359
  "trial_name": null,
360
  "trial_params": null
361
  }
 
1
  {
2
+ "best_metric": 0.8440366972477065,
3
+ "best_model_checkpoint": "microsoft-resnet-50-cartoon-emotion-detection/checkpoint-528",
4
+ "epoch": 79.96969696969697,
5
+ "global_step": 640,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.97,
12
+ "eval_accuracy": 0.22935779816513763,
13
+ "eval_f1": 0.21647331951741566,
14
+ "eval_loss": 1.3854628801345825,
15
+ "eval_precision": 0.2697355008315897,
16
+ "eval_recall": 0.22935779816513763,
17
+ "eval_runtime": 6.0845,
18
+ "eval_samples_per_second": 17.914,
19
+ "eval_steps_per_second": 0.329,
20
  "step": 8
21
  },
22
  {
23
  "epoch": 1.24,
24
+ "learning_rate": 1.8750000000000002e-05,
25
+ "loss": 1.4222,
26
  "step": 10
27
  },
28
  {
29
  "epoch": 1.97,
30
+ "eval_accuracy": 0.25688073394495414,
31
+ "eval_f1": 0.25434987070599063,
32
+ "eval_loss": 1.3791918754577637,
33
+ "eval_precision": 0.2807708571335123,
34
+ "eval_recall": 0.25688073394495414,
35
+ "eval_runtime": 4.2925,
36
+ "eval_samples_per_second": 25.393,
37
+ "eval_steps_per_second": 0.466,
38
  "step": 16
39
  },
40
  {
41
  "epoch": 2.48,
42
+ "learning_rate": 3.7500000000000003e-05,
43
+ "loss": 1.4183,
44
  "step": 20
45
  },
46
  {
47
  "epoch": 2.97,
48
+ "eval_accuracy": 0.3853211009174312,
49
+ "eval_f1": 0.3511144563688655,
50
+ "eval_loss": 1.3646042346954346,
51
+ "eval_precision": 0.4102155235182758,
52
+ "eval_recall": 0.3853211009174312,
53
+ "eval_runtime": 4.3579,
54
+ "eval_samples_per_second": 25.012,
55
+ "eval_steps_per_second": 0.459,
56
  "step": 24
57
  },
58
  {
59
  "epoch": 3.73,
60
+ "learning_rate": 5.625e-05,
61
+ "loss": 1.4097,
62
  "step": 30
63
  },
64
  {
65
  "epoch": 3.97,
66
+ "eval_accuracy": 0.41284403669724773,
67
+ "eval_f1": 0.32452248836490605,
68
+ "eval_loss": 1.3562716245651245,
69
+ "eval_precision": 0.5062447369587377,
70
+ "eval_recall": 0.41284403669724773,
71
+ "eval_runtime": 4.3612,
72
+ "eval_samples_per_second": 24.993,
73
+ "eval_steps_per_second": 0.459,
74
  "step": 32
75
  },
76
  {
77
  "epoch": 4.97,
78
+ "learning_rate": 7.500000000000001e-05,
79
+ "loss": 1.3944,
80
  "step": 40
81
  },
82
  {
83
  "epoch": 4.97,
84
+ "eval_accuracy": 0.4036697247706422,
85
+ "eval_f1": 0.293905028882093,
86
+ "eval_loss": 1.3461558818817139,
87
+ "eval_precision": 0.3927443217900042,
88
+ "eval_recall": 0.4036697247706422,
89
+ "eval_runtime": 4.4159,
90
+ "eval_samples_per_second": 24.684,
91
+ "eval_steps_per_second": 0.453,
92
  "step": 40
93
  },
94
  {
95
  "epoch": 5.97,
96
+ "eval_accuracy": 0.4036697247706422,
97
+ "eval_f1": 0.2840541721275666,
98
+ "eval_loss": 1.322252631187439,
99
+ "eval_precision": 0.5152005756431013,
100
+ "eval_recall": 0.4036697247706422,
101
+ "eval_runtime": 4.2725,
102
+ "eval_samples_per_second": 25.512,
103
+ "eval_steps_per_second": 0.468,
104
  "step": 48
105
  },
106
  {
107
  "epoch": 6.24,
108
+ "learning_rate": 9.375e-05,
109
+ "loss": 1.411,
110
  "step": 50
111
  },
112
  {
113
  "epoch": 6.97,
114
+ "eval_accuracy": 0.41284403669724773,
115
+ "eval_f1": 0.29851761513413444,
116
+ "eval_loss": 1.304025411605835,
117
+ "eval_precision": 0.44039013993142434,
118
+ "eval_recall": 0.41284403669724773,
119
+ "eval_runtime": 4.4737,
120
+ "eval_samples_per_second": 24.365,
121
+ "eval_steps_per_second": 0.447,
122
  "step": 56
123
  },
124
  {
125
  "epoch": 7.48,
126
+ "learning_rate": 0.0001125,
127
+ "loss": 1.346,
128
  "step": 60
129
  },
130
  {
131
  "epoch": 7.97,
132
+ "eval_accuracy": 0.4954128440366973,
133
+ "eval_f1": 0.4092687533729088,
134
+ "eval_loss": 1.2699785232543945,
135
+ "eval_precision": 0.49603902723168775,
136
+ "eval_recall": 0.4954128440366973,
137
+ "eval_runtime": 4.5079,
138
+ "eval_samples_per_second": 24.18,
139
+ "eval_steps_per_second": 0.444,
140
  "step": 64
141
  },
142
  {
143
  "epoch": 8.73,
144
+ "learning_rate": 0.00011875000000000001,
145
+ "loss": 1.3031,
146
  "step": 70
147
  },
148
  {
149
  "epoch": 8.97,
150
+ "eval_accuracy": 0.5596330275229358,
151
+ "eval_f1": 0.46723852012270245,
152
+ "eval_loss": 1.2149937152862549,
153
+ "eval_precision": 0.5440134711415631,
154
+ "eval_recall": 0.5596330275229358,
155
+ "eval_runtime": 4.3712,
156
+ "eval_samples_per_second": 24.936,
157
+ "eval_steps_per_second": 0.458,
158
  "step": 72
159
  },
160
  {
161
  "epoch": 9.97,
162
+ "learning_rate": 0.00011666666666666667,
163
+ "loss": 1.2371,
164
  "step": 80
165
  },
166
  {
167
  "epoch": 9.97,
168
+ "eval_accuracy": 0.5963302752293578,
169
+ "eval_f1": 0.5100508405791544,
170
+ "eval_loss": 1.1580270528793335,
171
+ "eval_precision": 0.5659333353788879,
172
+ "eval_recall": 0.5963302752293578,
173
+ "eval_runtime": 4.2133,
174
+ "eval_samples_per_second": 25.87,
175
+ "eval_steps_per_second": 0.475,
176
  "step": 80
177
  },
178
  {
179
  "epoch": 10.97,
180
+ "eval_accuracy": 0.6055045871559633,
181
+ "eval_f1": 0.5211004609031267,
182
+ "eval_loss": 1.066982388496399,
183
+ "eval_precision": 0.7279186904119446,
184
+ "eval_recall": 0.6055045871559633,
185
+ "eval_runtime": 14.5277,
186
+ "eval_samples_per_second": 7.503,
187
+ "eval_steps_per_second": 0.138,
188
  "step": 88
189
  },
190
  {
191
  "epoch": 11.24,
192
+ "learning_rate": 0.00011458333333333334,
193
+ "loss": 1.1736,
194
  "step": 90
195
  },
196
  {
197
  "epoch": 11.97,
198
+ "eval_accuracy": 0.6605504587155964,
199
+ "eval_f1": 0.5771675777046106,
200
+ "eval_loss": 0.9855989813804626,
201
+ "eval_precision": 0.5536726213674042,
202
+ "eval_recall": 0.6605504587155964,
203
+ "eval_runtime": 4.2828,
204
+ "eval_samples_per_second": 25.451,
205
+ "eval_steps_per_second": 0.467,
206
  "step": 96
207
  },
208
  {
209
  "epoch": 12.48,
210
+ "learning_rate": 0.0001125,
211
+ "loss": 1.0457,
212
  "step": 100
213
  },
214
  {
215
  "epoch": 12.97,
216
+ "eval_accuracy": 0.6697247706422018,
217
+ "eval_f1": 0.5964926350313968,
218
+ "eval_loss": 0.896264374256134,
219
+ "eval_precision": 0.763141515721791,
220
+ "eval_recall": 0.6697247706422018,
221
+ "eval_runtime": 4.2315,
222
+ "eval_samples_per_second": 25.759,
223
+ "eval_steps_per_second": 0.473,
224
  "step": 104
225
  },
226
  {
227
  "epoch": 13.73,
228
+ "learning_rate": 0.00011041666666666666,
229
+ "loss": 0.953,
230
  "step": 110
231
  },
232
  {
233
  "epoch": 13.97,
234
+ "eval_accuracy": 0.6697247706422018,
235
+ "eval_f1": 0.6081464096911078,
236
+ "eval_loss": 0.8546512722969055,
237
+ "eval_precision": 0.6884856947005512,
238
+ "eval_recall": 0.6697247706422018,
239
+ "eval_runtime": 4.4534,
240
+ "eval_samples_per_second": 24.476,
241
+ "eval_steps_per_second": 0.449,
242
  "step": 112
243
  },
244
  {
245
  "epoch": 14.97,
246
+ "learning_rate": 0.00010833333333333334,
247
+ "loss": 0.8579,
248
  "step": 120
249
  },
250
  {
251
  "epoch": 14.97,
252
+ "eval_accuracy": 0.7155963302752294,
253
+ "eval_f1": 0.6643295304342026,
254
+ "eval_loss": 0.7848823070526123,
255
+ "eval_precision": 0.7396182317656428,
256
+ "eval_recall": 0.7155963302752294,
257
+ "eval_runtime": 4.1662,
258
+ "eval_samples_per_second": 26.163,
259
+ "eval_steps_per_second": 0.48,
260
  "step": 120
261
  },
262
  {
263
  "epoch": 15.97,
264
+ "eval_accuracy": 0.7431192660550459,
265
+ "eval_f1": 0.711852075310986,
266
+ "eval_loss": 0.7563745379447937,
267
+ "eval_precision": 0.7371817784661822,
268
+ "eval_recall": 0.7431192660550459,
269
+ "eval_runtime": 4.2674,
270
+ "eval_samples_per_second": 25.542,
271
+ "eval_steps_per_second": 0.469,
272
  "step": 128
273
  },
274
  {
275
  "epoch": 16.24,
276
+ "learning_rate": 0.00010625,
277
+ "loss": 0.8167,
278
  "step": 130
279
  },
280
  {
281
  "epoch": 16.97,
282
+ "eval_accuracy": 0.7614678899082569,
283
+ "eval_f1": 0.7210716889645992,
284
+ "eval_loss": 0.7132583260536194,
285
+ "eval_precision": 0.7506553079947577,
286
+ "eval_recall": 0.7614678899082569,
287
+ "eval_runtime": 4.1418,
288
+ "eval_samples_per_second": 26.317,
289
+ "eval_steps_per_second": 0.483,
290
  "step": 136
291
  },
292
  {
293
  "epoch": 17.48,
294
+ "learning_rate": 0.00010416666666666667,
295
+ "loss": 0.7273,
296
  "step": 140
297
  },
298
  {
299
  "epoch": 17.97,
300
+ "eval_accuracy": 0.7522935779816514,
301
+ "eval_f1": 0.7202155642522615,
302
+ "eval_loss": 0.6887747645378113,
303
+ "eval_precision": 0.7378685592291271,
304
+ "eval_recall": 0.7522935779816514,
305
+ "eval_runtime": 4.5118,
306
+ "eval_samples_per_second": 24.159,
307
+ "eval_steps_per_second": 0.443,
308
  "step": 144
309
  },
310
  {
311
  "epoch": 18.73,
312
+ "learning_rate": 0.00010208333333333334,
313
+ "loss": 0.6547,
314
  "step": 150
315
  },
316
  {
317
  "epoch": 18.97,
318
+ "eval_accuracy": 0.7798165137614679,
319
+ "eval_f1": 0.7576525411387797,
320
+ "eval_loss": 0.659186065196991,
321
+ "eval_precision": 0.7772903701802784,
322
+ "eval_recall": 0.7798165137614679,
323
+ "eval_runtime": 4.3816,
324
+ "eval_samples_per_second": 24.877,
325
+ "eval_steps_per_second": 0.456,
326
  "step": 152
327
  },
328
  {
329
  "epoch": 19.97,
330
+ "learning_rate": 0.0001,
331
+ "loss": 0.5963,
332
  "step": 160
333
  },
334
  {
335
  "epoch": 19.97,
336
+ "eval_accuracy": 0.7706422018348624,
337
+ "eval_f1": 0.7550663564666461,
338
+ "eval_loss": 0.6136144399642944,
339
+ "eval_precision": 0.764159781184113,
340
+ "eval_recall": 0.7706422018348624,
341
+ "eval_runtime": 4.2561,
342
+ "eval_samples_per_second": 25.61,
343
+ "eval_steps_per_second": 0.47,
344
  "step": 160
345
  },
346
  {
347
+ "epoch": 20.97,
348
+ "eval_accuracy": 0.7889908256880734,
349
+ "eval_f1": 0.7786916064255194,
350
+ "eval_loss": 0.5723462700843811,
351
+ "eval_precision": 0.7801645588430826,
352
+ "eval_recall": 0.7889908256880734,
353
+ "eval_runtime": 5.2546,
354
+ "eval_samples_per_second": 20.744,
355
+ "eval_steps_per_second": 0.381,
356
+ "step": 168
357
+ },
358
+ {
359
+ "epoch": 21.24,
360
+ "learning_rate": 9.791666666666667e-05,
361
+ "loss": 0.551,
362
+ "step": 170
363
+ },
364
+ {
365
+ "epoch": 21.97,
366
+ "eval_accuracy": 0.7889908256880734,
367
+ "eval_f1": 0.7780894219567608,
368
+ "eval_loss": 0.5686229467391968,
369
+ "eval_precision": 0.776092121476891,
370
+ "eval_recall": 0.7889908256880734,
371
+ "eval_runtime": 4.3144,
372
+ "eval_samples_per_second": 25.264,
373
+ "eval_steps_per_second": 0.464,
374
+ "step": 176
375
+ },
376
+ {
377
+ "epoch": 22.48,
378
+ "learning_rate": 9.583333333333334e-05,
379
+ "loss": 0.4929,
380
+ "step": 180
381
+ },
382
+ {
383
+ "epoch": 22.97,
384
+ "eval_accuracy": 0.7706422018348624,
385
+ "eval_f1": 0.7651464296127533,
386
+ "eval_loss": 0.5596823692321777,
387
+ "eval_precision": 0.7649127896435436,
388
+ "eval_recall": 0.7706422018348624,
389
+ "eval_runtime": 4.4326,
390
+ "eval_samples_per_second": 24.59,
391
+ "eval_steps_per_second": 0.451,
392
+ "step": 184
393
+ },
394
+ {
395
+ "epoch": 23.73,
396
+ "learning_rate": 9.375e-05,
397
+ "loss": 0.4309,
398
+ "step": 190
399
+ },
400
+ {
401
+ "epoch": 23.97,
402
+ "eval_accuracy": 0.7889908256880734,
403
+ "eval_f1": 0.781022184850044,
404
+ "eval_loss": 0.5233965516090393,
405
+ "eval_precision": 0.7774110647118855,
406
+ "eval_recall": 0.7889908256880734,
407
+ "eval_runtime": 4.3167,
408
+ "eval_samples_per_second": 25.251,
409
+ "eval_steps_per_second": 0.463,
410
+ "step": 192
411
+ },
412
+ {
413
+ "epoch": 24.97,
414
+ "learning_rate": 9.166666666666667e-05,
415
+ "loss": 0.3945,
416
+ "step": 200
417
+ },
418
+ {
419
+ "epoch": 24.97,
420
+ "eval_accuracy": 0.7889908256880734,
421
+ "eval_f1": 0.7812909567496724,
422
+ "eval_loss": 0.5007840991020203,
423
+ "eval_precision": 0.7836723839914905,
424
+ "eval_recall": 0.7889908256880734,
425
+ "eval_runtime": 4.3513,
426
+ "eval_samples_per_second": 25.05,
427
+ "eval_steps_per_second": 0.46,
428
+ "step": 200
429
+ },
430
+ {
431
+ "epoch": 25.97,
432
+ "eval_accuracy": 0.7522935779816514,
433
+ "eval_f1": 0.7528893125223401,
434
+ "eval_loss": 0.5289302468299866,
435
+ "eval_precision": 0.7537344154316632,
436
+ "eval_recall": 0.7522935779816514,
437
+ "eval_runtime": 4.3647,
438
+ "eval_samples_per_second": 24.973,
439
+ "eval_steps_per_second": 0.458,
440
+ "step": 208
441
+ },
442
+ {
443
+ "epoch": 26.24,
444
+ "learning_rate": 8.958333333333333e-05,
445
+ "loss": 0.3704,
446
+ "step": 210
447
+ },
448
+ {
449
+ "epoch": 26.97,
450
+ "eval_accuracy": 0.7981651376146789,
451
+ "eval_f1": 0.7962706746091569,
452
+ "eval_loss": 0.4399118423461914,
453
+ "eval_precision": 0.7957732392169626,
454
+ "eval_recall": 0.7981651376146789,
455
+ "eval_runtime": 4.2147,
456
+ "eval_samples_per_second": 25.862,
457
+ "eval_steps_per_second": 0.475,
458
+ "step": 216
459
+ },
460
+ {
461
+ "epoch": 27.48,
462
+ "learning_rate": 8.75e-05,
463
+ "loss": 0.3267,
464
+ "step": 220
465
+ },
466
+ {
467
+ "epoch": 27.97,
468
+ "eval_accuracy": 0.8073394495412844,
469
+ "eval_f1": 0.8005365278178692,
470
+ "eval_loss": 0.4539415240287781,
471
+ "eval_precision": 0.7983420088683247,
472
+ "eval_recall": 0.8073394495412844,
473
+ "eval_runtime": 4.2272,
474
+ "eval_samples_per_second": 25.785,
475
+ "eval_steps_per_second": 0.473,
476
+ "step": 224
477
+ },
478
+ {
479
+ "epoch": 28.73,
480
+ "learning_rate": 8.541666666666668e-05,
481
+ "loss": 0.2966,
482
+ "step": 230
483
+ },
484
+ {
485
+ "epoch": 28.97,
486
+ "eval_accuracy": 0.7798165137614679,
487
+ "eval_f1": 0.7837137974455839,
488
+ "eval_loss": 0.4734969735145569,
489
+ "eval_precision": 0.789241175758926,
490
+ "eval_recall": 0.7798165137614679,
491
+ "eval_runtime": 4.2368,
492
+ "eval_samples_per_second": 25.727,
493
+ "eval_steps_per_second": 0.472,
494
+ "step": 232
495
+ },
496
+ {
497
+ "epoch": 29.97,
498
+ "learning_rate": 8.333333333333333e-05,
499
+ "loss": 0.2645,
500
+ "step": 240
501
+ },
502
+ {
503
+ "epoch": 29.97,
504
+ "eval_accuracy": 0.7706422018348624,
505
+ "eval_f1": 0.7706422018348624,
506
+ "eval_loss": 0.4594463109970093,
507
+ "eval_precision": 0.7706422018348624,
508
+ "eval_recall": 0.7706422018348624,
509
+ "eval_runtime": 4.3943,
510
+ "eval_samples_per_second": 24.805,
511
+ "eval_steps_per_second": 0.455,
512
+ "step": 240
513
+ },
514
+ {
515
+ "epoch": 30.97,
516
+ "eval_accuracy": 0.7522935779816514,
517
+ "eval_f1": 0.7533140030125614,
518
+ "eval_loss": 0.4698648750782013,
519
+ "eval_precision": 0.7554437319096274,
520
+ "eval_recall": 0.7522935779816514,
521
+ "eval_runtime": 4.1296,
522
+ "eval_samples_per_second": 26.394,
523
+ "eval_steps_per_second": 0.484,
524
+ "step": 248
525
+ },
526
+ {
527
+ "epoch": 31.24,
528
+ "learning_rate": 8.125000000000001e-05,
529
+ "loss": 0.2527,
530
+ "step": 250
531
+ },
532
+ {
533
+ "epoch": 31.97,
534
+ "eval_accuracy": 0.7889908256880734,
535
+ "eval_f1": 0.7856523955147808,
536
+ "eval_loss": 0.45513755083084106,
537
+ "eval_precision": 0.785609816323817,
538
+ "eval_recall": 0.7889908256880734,
539
+ "eval_runtime": 4.1348,
540
+ "eval_samples_per_second": 26.361,
541
+ "eval_steps_per_second": 0.484,
542
+ "step": 256
543
+ },
544
+ {
545
+ "epoch": 32.48,
546
+ "learning_rate": 7.916666666666666e-05,
547
+ "loss": 0.2202,
548
+ "step": 260
549
+ },
550
+ {
551
+ "epoch": 32.97,
552
+ "eval_accuracy": 0.8165137614678899,
553
+ "eval_f1": 0.8170336224601304,
554
+ "eval_loss": 0.4457748532295227,
555
+ "eval_precision": 0.8197900424266599,
556
+ "eval_recall": 0.8165137614678899,
557
+ "eval_runtime": 5.1689,
558
+ "eval_samples_per_second": 21.087,
559
+ "eval_steps_per_second": 0.387,
560
+ "step": 264
561
+ },
562
+ {
563
+ "epoch": 33.73,
564
+ "learning_rate": 7.708333333333334e-05,
565
+ "loss": 0.2006,
566
+ "step": 270
567
+ },
568
+ {
569
+ "epoch": 33.97,
570
+ "eval_accuracy": 0.7798165137614679,
571
+ "eval_f1": 0.7850412357933325,
572
+ "eval_loss": 0.46321260929107666,
573
+ "eval_precision": 0.7940749153601442,
574
+ "eval_recall": 0.7798165137614679,
575
+ "eval_runtime": 4.2326,
576
+ "eval_samples_per_second": 25.752,
577
+ "eval_steps_per_second": 0.473,
578
+ "step": 272
579
+ },
580
+ {
581
+ "epoch": 34.97,
582
+ "learning_rate": 7.500000000000001e-05,
583
+ "loss": 0.1589,
584
+ "step": 280
585
+ },
586
+ {
587
+ "epoch": 34.97,
588
+ "eval_accuracy": 0.7889908256880734,
589
+ "eval_f1": 0.7925041274199247,
590
+ "eval_loss": 0.46511203050613403,
591
+ "eval_precision": 0.7993275970140749,
592
+ "eval_recall": 0.7889908256880734,
593
+ "eval_runtime": 4.1566,
594
+ "eval_samples_per_second": 26.223,
595
+ "eval_steps_per_second": 0.481,
596
+ "step": 280
597
+ },
598
+ {
599
+ "epoch": 35.97,
600
+ "eval_accuracy": 0.7798165137614679,
601
+ "eval_f1": 0.7803733958918072,
602
+ "eval_loss": 0.45948973298072815,
603
+ "eval_precision": 0.7823930357635982,
604
+ "eval_recall": 0.7798165137614679,
605
+ "eval_runtime": 4.3202,
606
+ "eval_samples_per_second": 25.23,
607
+ "eval_steps_per_second": 0.463,
608
+ "step": 288
609
+ },
610
+ {
611
+ "epoch": 36.24,
612
+ "learning_rate": 7.291666666666666e-05,
613
+ "loss": 0.153,
614
+ "step": 290
615
+ },
616
+ {
617
+ "epoch": 36.97,
618
+ "eval_accuracy": 0.7614678899082569,
619
+ "eval_f1": 0.7632546001327497,
620
+ "eval_loss": 0.458363801240921,
621
+ "eval_precision": 0.7690950187958565,
622
+ "eval_recall": 0.7614678899082569,
623
+ "eval_runtime": 4.4275,
624
+ "eval_samples_per_second": 24.619,
625
+ "eval_steps_per_second": 0.452,
626
+ "step": 296
627
+ },
628
+ {
629
+ "epoch": 37.48,
630
+ "learning_rate": 7.083333333333334e-05,
631
+ "loss": 0.1427,
632
+ "step": 300
633
+ },
634
+ {
635
+ "epoch": 37.97,
636
+ "eval_accuracy": 0.7798165137614679,
637
+ "eval_f1": 0.7796360891921739,
638
+ "eval_loss": 0.46078726649284363,
639
+ "eval_precision": 0.782995656118911,
640
+ "eval_recall": 0.7798165137614679,
641
+ "eval_runtime": 4.4461,
642
+ "eval_samples_per_second": 24.516,
643
+ "eval_steps_per_second": 0.45,
644
+ "step": 304
645
+ },
646
+ {
647
+ "epoch": 38.73,
648
+ "learning_rate": 6.874999999999999e-05,
649
+ "loss": 0.113,
650
+ "step": 310
651
+ },
652
+ {
653
+ "epoch": 38.97,
654
+ "eval_accuracy": 0.7889908256880734,
655
+ "eval_f1": 0.7898954921921011,
656
+ "eval_loss": 0.45713570713996887,
657
+ "eval_precision": 0.7922270390160299,
658
+ "eval_recall": 0.7889908256880734,
659
+ "eval_runtime": 4.3087,
660
+ "eval_samples_per_second": 25.297,
661
+ "eval_steps_per_second": 0.464,
662
+ "step": 312
663
+ },
664
+ {
665
+ "epoch": 39.97,
666
+ "learning_rate": 6.666666666666667e-05,
667
+ "loss": 0.1146,
668
+ "step": 320
669
+ },
670
+ {
671
+ "epoch": 39.97,
672
+ "eval_accuracy": 0.7614678899082569,
673
+ "eval_f1": 0.7612891030218949,
674
+ "eval_loss": 0.5269873142242432,
675
+ "eval_precision": 0.765128347697155,
676
+ "eval_recall": 0.7614678899082569,
677
+ "eval_runtime": 4.2908,
678
+ "eval_samples_per_second": 25.403,
679
+ "eval_steps_per_second": 0.466,
680
+ "step": 320
681
+ },
682
+ {
683
+ "epoch": 40.97,
684
+ "eval_accuracy": 0.7706422018348624,
685
+ "eval_f1": 0.7709516996486127,
686
+ "eval_loss": 0.48878130316734314,
687
+ "eval_precision": 0.7781771515945828,
688
+ "eval_recall": 0.7706422018348624,
689
+ "eval_runtime": 4.3303,
690
+ "eval_samples_per_second": 25.171,
691
+ "eval_steps_per_second": 0.462,
692
+ "step": 328
693
+ },
694
+ {
695
+ "epoch": 41.24,
696
+ "learning_rate": 6.458333333333334e-05,
697
+ "loss": 0.1275,
698
+ "step": 330
699
+ },
700
+ {
701
+ "epoch": 41.97,
702
+ "eval_accuracy": 0.7889908256880734,
703
+ "eval_f1": 0.7836706749228535,
704
+ "eval_loss": 0.4523099958896637,
705
+ "eval_precision": 0.780905883107718,
706
+ "eval_recall": 0.7889908256880734,
707
+ "eval_runtime": 4.2033,
708
+ "eval_samples_per_second": 25.932,
709
+ "eval_steps_per_second": 0.476,
710
+ "step": 336
711
+ },
712
+ {
713
+ "epoch": 42.48,
714
+ "learning_rate": 6.25e-05,
715
+ "loss": 0.0959,
716
+ "step": 340
717
+ },
718
+ {
719
+ "epoch": 42.97,
720
+ "eval_accuracy": 0.7798165137614679,
721
+ "eval_f1": 0.7767063396275014,
722
+ "eval_loss": 0.46965503692626953,
723
+ "eval_precision": 0.7753268138589239,
724
+ "eval_recall": 0.7798165137614679,
725
+ "eval_runtime": 4.3242,
726
+ "eval_samples_per_second": 25.207,
727
+ "eval_steps_per_second": 0.463,
728
+ "step": 344
729
+ },
730
+ {
731
+ "epoch": 43.73,
732
+ "learning_rate": 6.041666666666667e-05,
733
+ "loss": 0.0882,
734
+ "step": 350
735
+ },
736
+ {
737
+ "epoch": 43.97,
738
+ "eval_accuracy": 0.7706422018348624,
739
+ "eval_f1": 0.7685729724992435,
740
+ "eval_loss": 0.4286104738712311,
741
+ "eval_precision": 0.7685932721712537,
742
+ "eval_recall": 0.7706422018348624,
743
+ "eval_runtime": 4.4175,
744
+ "eval_samples_per_second": 24.675,
745
+ "eval_steps_per_second": 0.453,
746
+ "step": 352
747
+ },
748
+ {
749
+ "epoch": 44.97,
750
+ "learning_rate": 5.833333333333333e-05,
751
+ "loss": 0.0847,
752
+ "step": 360
753
+ },
754
+ {
755
+ "epoch": 44.97,
756
+ "eval_accuracy": 0.7889908256880734,
757
+ "eval_f1": 0.7925041274199247,
758
+ "eval_loss": 0.5317460298538208,
759
+ "eval_precision": 0.7993275970140749,
760
+ "eval_recall": 0.7889908256880734,
761
+ "eval_runtime": 6.0042,
762
+ "eval_samples_per_second": 18.154,
763
+ "eval_steps_per_second": 0.333,
764
+ "step": 360
765
+ },
766
+ {
767
+ "epoch": 45.97,
768
+ "eval_accuracy": 0.7614678899082569,
769
+ "eval_f1": 0.7646893115457605,
770
+ "eval_loss": 0.5431071519851685,
771
+ "eval_precision": 0.7699552364490537,
772
+ "eval_recall": 0.7614678899082569,
773
+ "eval_runtime": 4.3547,
774
+ "eval_samples_per_second": 25.03,
775
+ "eval_steps_per_second": 0.459,
776
+ "step": 368
777
+ },
778
+ {
779
+ "epoch": 46.24,
780
+ "learning_rate": 5.625e-05,
781
+ "loss": 0.0813,
782
+ "step": 370
783
+ },
784
+ {
785
+ "epoch": 46.97,
786
+ "eval_accuracy": 0.8256880733944955,
787
+ "eval_f1": 0.8284158367266842,
788
+ "eval_loss": 0.44316479563713074,
789
+ "eval_precision": 0.843538901662607,
790
+ "eval_recall": 0.8256880733944955,
791
+ "eval_runtime": 4.2714,
792
+ "eval_samples_per_second": 25.519,
793
+ "eval_steps_per_second": 0.468,
794
+ "step": 376
795
+ },
796
+ {
797
+ "epoch": 47.48,
798
+ "learning_rate": 5.416666666666667e-05,
799
+ "loss": 0.0768,
800
+ "step": 380
801
+ },
802
+ {
803
+ "epoch": 47.97,
804
+ "eval_accuracy": 0.7981651376146789,
805
+ "eval_f1": 0.7955546490941502,
806
+ "eval_loss": 0.4885597229003906,
807
+ "eval_precision": 0.8005162605636117,
808
+ "eval_recall": 0.7981651376146789,
809
+ "eval_runtime": 4.2576,
810
+ "eval_samples_per_second": 25.602,
811
+ "eval_steps_per_second": 0.47,
812
+ "step": 384
813
+ },
814
+ {
815
+ "epoch": 48.73,
816
+ "learning_rate": 5.208333333333334e-05,
817
+ "loss": 0.0627,
818
+ "step": 390
819
+ },
820
+ {
821
+ "epoch": 48.97,
822
+ "eval_accuracy": 0.7981651376146789,
823
+ "eval_f1": 0.8009727230444837,
824
+ "eval_loss": 0.5372528433799744,
825
+ "eval_precision": 0.8071912929511652,
826
+ "eval_recall": 0.7981651376146789,
827
+ "eval_runtime": 4.3763,
828
+ "eval_samples_per_second": 24.907,
829
+ "eval_steps_per_second": 0.457,
830
+ "step": 392
831
+ },
832
+ {
833
+ "epoch": 49.97,
834
+ "learning_rate": 5e-05,
835
+ "loss": 0.0688,
836
+ "step": 400
837
+ },
838
+ {
839
+ "epoch": 49.97,
840
+ "eval_accuracy": 0.7798165137614679,
841
+ "eval_f1": 0.7822443312570955,
842
+ "eval_loss": 0.5896694660186768,
843
+ "eval_precision": 0.789232725399693,
844
+ "eval_recall": 0.7798165137614679,
845
+ "eval_runtime": 4.3692,
846
+ "eval_samples_per_second": 24.947,
847
+ "eval_steps_per_second": 0.458,
848
+ "step": 400
849
+ },
850
+ {
851
+ "epoch": 50.97,
852
+ "eval_accuracy": 0.7981651376146789,
853
+ "eval_f1": 0.7992371326578404,
854
+ "eval_loss": 0.5114642977714539,
855
+ "eval_precision": 0.8014849021031709,
856
+ "eval_recall": 0.7981651376146789,
857
+ "eval_runtime": 4.3028,
858
+ "eval_samples_per_second": 25.332,
859
+ "eval_steps_per_second": 0.465,
860
+ "step": 408
861
+ },
862
+ {
863
+ "epoch": 51.24,
864
+ "learning_rate": 4.791666666666667e-05,
865
+ "loss": 0.0676,
866
+ "step": 410
867
+ },
868
+ {
869
+ "epoch": 51.97,
870
+ "eval_accuracy": 0.7981651376146789,
871
+ "eval_f1": 0.7977993643940755,
872
+ "eval_loss": 0.4881470799446106,
873
+ "eval_precision": 0.7998301588132414,
874
+ "eval_recall": 0.7981651376146789,
875
+ "eval_runtime": 4.3212,
876
+ "eval_samples_per_second": 25.224,
877
+ "eval_steps_per_second": 0.463,
878
+ "step": 416
879
+ },
880
+ {
881
+ "epoch": 52.48,
882
+ "learning_rate": 4.5833333333333334e-05,
883
+ "loss": 0.0539,
884
+ "step": 420
885
+ },
886
+ {
887
+ "epoch": 52.97,
888
+ "eval_accuracy": 0.8073394495412844,
889
+ "eval_f1": 0.807691055721277,
890
+ "eval_loss": 0.4819609224796295,
891
+ "eval_precision": 0.8138503399273244,
892
+ "eval_recall": 0.8073394495412844,
893
+ "eval_runtime": 4.3125,
894
+ "eval_samples_per_second": 25.276,
895
+ "eval_steps_per_second": 0.464,
896
+ "step": 424
897
+ },
898
+ {
899
+ "epoch": 53.73,
900
+ "learning_rate": 4.375e-05,
901
+ "loss": 0.0596,
902
+ "step": 430
903
+ },
904
+ {
905
+ "epoch": 53.97,
906
+ "eval_accuracy": 0.8256880733944955,
907
+ "eval_f1": 0.8244465797713664,
908
+ "eval_loss": 0.4449571669101715,
909
+ "eval_precision": 0.8245908379614004,
910
+ "eval_recall": 0.8256880733944955,
911
+ "eval_runtime": 5.8195,
912
+ "eval_samples_per_second": 18.73,
913
+ "eval_steps_per_second": 0.344,
914
+ "step": 432
915
+ },
916
+ {
917
+ "epoch": 54.97,
918
+ "learning_rate": 4.1666666666666665e-05,
919
+ "loss": 0.0611,
920
+ "step": 440
921
+ },
922
+ {
923
+ "epoch": 54.97,
924
+ "eval_accuracy": 0.7889908256880734,
925
+ "eval_f1": 0.7924465793472197,
926
+ "eval_loss": 0.5057494044303894,
927
+ "eval_precision": 0.8008019735410222,
928
+ "eval_recall": 0.7889908256880734,
929
+ "eval_runtime": 4.6616,
930
+ "eval_samples_per_second": 23.383,
931
+ "eval_steps_per_second": 0.429,
932
+ "step": 440
933
+ },
934
+ {
935
+ "epoch": 55.97,
936
+ "eval_accuracy": 0.7981651376146789,
937
+ "eval_f1": 0.8008031371900772,
938
+ "eval_loss": 0.4918478727340698,
939
+ "eval_precision": 0.8056331161636306,
940
+ "eval_recall": 0.7981651376146789,
941
+ "eval_runtime": 4.8639,
942
+ "eval_samples_per_second": 22.41,
943
+ "eval_steps_per_second": 0.411,
944
+ "step": 448
945
+ },
946
+ {
947
+ "epoch": 56.24,
948
+ "learning_rate": 3.958333333333333e-05,
949
+ "loss": 0.0643,
950
+ "step": 450
951
+ },
952
+ {
953
+ "epoch": 56.97,
954
+ "eval_accuracy": 0.7522935779816514,
955
+ "eval_f1": 0.7545272459450874,
956
+ "eval_loss": 0.5946044325828552,
957
+ "eval_precision": 0.7587394976605187,
958
+ "eval_recall": 0.7522935779816514,
959
+ "eval_runtime": 4.4153,
960
+ "eval_samples_per_second": 24.687,
961
+ "eval_steps_per_second": 0.453,
962
+ "step": 456
963
+ },
964
+ {
965
+ "epoch": 57.48,
966
+ "learning_rate": 3.7500000000000003e-05,
967
+ "loss": 0.0605,
968
+ "step": 460
969
+ },
970
+ {
971
+ "epoch": 57.97,
972
+ "eval_accuracy": 0.8073394495412844,
973
+ "eval_f1": 0.8120936083458042,
974
+ "eval_loss": 0.4887966513633728,
975
+ "eval_precision": 0.8238645162528876,
976
+ "eval_recall": 0.8073394495412844,
977
+ "eval_runtime": 6.6734,
978
+ "eval_samples_per_second": 16.334,
979
+ "eval_steps_per_second": 0.3,
980
+ "step": 464
981
+ },
982
+ {
983
+ "epoch": 58.73,
984
+ "learning_rate": 3.541666666666667e-05,
985
+ "loss": 0.063,
986
+ "step": 470
987
+ },
988
+ {
989
+ "epoch": 58.97,
990
+ "eval_accuracy": 0.7889908256880734,
991
+ "eval_f1": 0.7936673558168583,
992
+ "eval_loss": 0.5916518568992615,
993
+ "eval_precision": 0.8051419661311963,
994
+ "eval_recall": 0.7889908256880734,
995
+ "eval_runtime": 4.2726,
996
+ "eval_samples_per_second": 25.511,
997
+ "eval_steps_per_second": 0.468,
998
+ "step": 472
999
+ },
1000
+ {
1001
+ "epoch": 59.97,
1002
+ "learning_rate": 3.3333333333333335e-05,
1003
+ "loss": 0.0595,
1004
+ "step": 480
1005
+ },
1006
+ {
1007
+ "epoch": 59.97,
1008
+ "eval_accuracy": 0.7889908256880734,
1009
+ "eval_f1": 0.7894285298217145,
1010
+ "eval_loss": 0.5117025375366211,
1011
+ "eval_precision": 0.7904302906815871,
1012
+ "eval_recall": 0.7889908256880734,
1013
+ "eval_runtime": 4.4288,
1014
+ "eval_samples_per_second": 24.612,
1015
+ "eval_steps_per_second": 0.452,
1016
+ "step": 480
1017
+ },
1018
+ {
1019
+ "epoch": 60.97,
1020
+ "eval_accuracy": 0.7614678899082569,
1021
+ "eval_f1": 0.7635438011331693,
1022
+ "eval_loss": 0.549720048904419,
1023
+ "eval_precision": 0.769159484755815,
1024
+ "eval_recall": 0.7614678899082569,
1025
+ "eval_runtime": 4.2635,
1026
+ "eval_samples_per_second": 25.566,
1027
+ "eval_steps_per_second": 0.469,
1028
+ "step": 488
1029
+ },
1030
+ {
1031
+ "epoch": 61.24,
1032
+ "learning_rate": 3.125e-05,
1033
+ "loss": 0.0554,
1034
+ "step": 490
1035
+ },
1036
+ {
1037
+ "epoch": 61.97,
1038
+ "eval_accuracy": 0.8165137614678899,
1039
+ "eval_f1": 0.812552773801982,
1040
+ "eval_loss": 0.4742366075515747,
1041
+ "eval_precision": 0.8100926678908328,
1042
+ "eval_recall": 0.8165137614678899,
1043
+ "eval_runtime": 4.3801,
1044
+ "eval_samples_per_second": 24.885,
1045
+ "eval_steps_per_second": 0.457,
1046
+ "step": 496
1047
+ },
1048
+ {
1049
+ "epoch": 62.48,
1050
+ "learning_rate": 2.9166666666666666e-05,
1051
+ "loss": 0.0557,
1052
+ "step": 500
1053
+ },
1054
+ {
1055
+ "epoch": 62.97,
1056
+ "eval_accuracy": 0.7889908256880734,
1057
+ "eval_f1": 0.7886246853481453,
1058
+ "eval_loss": 0.5369319915771484,
1059
+ "eval_precision": 0.7886251828214332,
1060
+ "eval_recall": 0.7889908256880734,
1061
+ "eval_runtime": 4.4776,
1062
+ "eval_samples_per_second": 24.343,
1063
+ "eval_steps_per_second": 0.447,
1064
+ "step": 504
1065
+ },
1066
+ {
1067
+ "epoch": 63.73,
1068
+ "learning_rate": 2.7083333333333335e-05,
1069
+ "loss": 0.0539,
1070
+ "step": 510
1071
+ },
1072
+ {
1073
+ "epoch": 63.97,
1074
+ "eval_accuracy": 0.7889908256880734,
1075
+ "eval_f1": 0.7898954921921011,
1076
+ "eval_loss": 0.5439756512641907,
1077
+ "eval_precision": 0.7922270390160299,
1078
+ "eval_recall": 0.7889908256880734,
1079
+ "eval_runtime": 4.4228,
1080
+ "eval_samples_per_second": 24.645,
1081
+ "eval_steps_per_second": 0.452,
1082
+ "step": 512
1083
+ },
1084
+ {
1085
+ "epoch": 64.97,
1086
+ "learning_rate": 2.5e-05,
1087
+ "loss": 0.048,
1088
+ "step": 520
1089
+ },
1090
+ {
1091
+ "epoch": 64.97,
1092
+ "eval_accuracy": 0.7889908256880734,
1093
+ "eval_f1": 0.7882784673502429,
1094
+ "eval_loss": 0.5923751592636108,
1095
+ "eval_precision": 0.7878178107535906,
1096
+ "eval_recall": 0.7889908256880734,
1097
+ "eval_runtime": 5.5392,
1098
+ "eval_samples_per_second": 19.678,
1099
+ "eval_steps_per_second": 0.361,
1100
+ "step": 520
1101
+ },
1102
+ {
1103
+ "epoch": 65.97,
1104
+ "eval_accuracy": 0.8440366972477065,
1105
+ "eval_f1": 0.8440366972477065,
1106
+ "eval_loss": 0.486260324716568,
1107
+ "eval_precision": 0.8440366972477065,
1108
+ "eval_recall": 0.8440366972477065,
1109
+ "eval_runtime": 4.2084,
1110
+ "eval_samples_per_second": 25.9,
1111
+ "eval_steps_per_second": 0.475,
1112
+ "step": 528
1113
+ },
1114
+ {
1115
+ "epoch": 66.24,
1116
+ "learning_rate": 2.2916666666666667e-05,
1117
+ "loss": 0.045,
1118
+ "step": 530
1119
+ },
1120
+ {
1121
+ "epoch": 66.97,
1122
+ "eval_accuracy": 0.8073394495412844,
1123
+ "eval_f1": 0.8046694749207712,
1124
+ "eval_loss": 0.5849686861038208,
1125
+ "eval_precision": 0.8076432996157767,
1126
+ "eval_recall": 0.8073394495412844,
1127
+ "eval_runtime": 4.3405,
1128
+ "eval_samples_per_second": 25.112,
1129
+ "eval_steps_per_second": 0.461,
1130
+ "step": 536
1131
+ },
1132
+ {
1133
+ "epoch": 67.48,
1134
+ "learning_rate": 2.0833333333333333e-05,
1135
+ "loss": 0.047,
1136
+ "step": 540
1137
+ },
1138
+ {
1139
+ "epoch": 67.97,
1140
+ "eval_accuracy": 0.8256880733944955,
1141
+ "eval_f1": 0.8227139701424718,
1142
+ "eval_loss": 0.4938836991786957,
1143
+ "eval_precision": 0.8211639886949021,
1144
+ "eval_recall": 0.8256880733944955,
1145
+ "eval_runtime": 4.2943,
1146
+ "eval_samples_per_second": 25.382,
1147
+ "eval_steps_per_second": 0.466,
1148
+ "step": 544
1149
+ },
1150
+ {
1151
+ "epoch": 68.73,
1152
+ "learning_rate": 1.8750000000000002e-05,
1153
+ "loss": 0.0412,
1154
+ "step": 550
1155
+ },
1156
+ {
1157
+ "epoch": 68.97,
1158
+ "eval_accuracy": 0.7889908256880734,
1159
+ "eval_f1": 0.7899819010172877,
1160
+ "eval_loss": 0.4850451946258545,
1161
+ "eval_precision": 0.7911629060252914,
1162
+ "eval_recall": 0.7889908256880734,
1163
+ "eval_runtime": 4.3733,
1164
+ "eval_samples_per_second": 24.924,
1165
+ "eval_steps_per_second": 0.457,
1166
+ "step": 552
1167
+ },
1168
+ {
1169
+ "epoch": 69.97,
1170
+ "learning_rate": 1.6666666666666667e-05,
1171
+ "loss": 0.0392,
1172
+ "step": 560
1173
+ },
1174
+ {
1175
+ "epoch": 69.97,
1176
+ "eval_accuracy": 0.8256880733944955,
1177
+ "eval_f1": 0.8257529642156887,
1178
+ "eval_loss": 0.5065794587135315,
1179
+ "eval_precision": 0.8264957264957264,
1180
+ "eval_recall": 0.8256880733944955,
1181
+ "eval_runtime": 4.2901,
1182
+ "eval_samples_per_second": 25.407,
1183
+ "eval_steps_per_second": 0.466,
1184
+ "step": 560
1185
+ },
1186
+ {
1187
+ "epoch": 70.97,
1188
+ "eval_accuracy": 0.8073394495412844,
1189
+ "eval_f1": 0.8058135764557783,
1190
+ "eval_loss": 0.49649733304977417,
1191
+ "eval_precision": 0.8053007135575942,
1192
+ "eval_recall": 0.8073394495412844,
1193
+ "eval_runtime": 4.1424,
1194
+ "eval_samples_per_second": 26.313,
1195
+ "eval_steps_per_second": 0.483,
1196
+ "step": 568
1197
+ },
1198
+ {
1199
+ "epoch": 71.24,
1200
+ "learning_rate": 1.4583333333333333e-05,
1201
+ "loss": 0.0423,
1202
+ "step": 570
1203
+ },
1204
+ {
1205
+ "epoch": 71.97,
1206
+ "eval_accuracy": 0.8348623853211009,
1207
+ "eval_f1": 0.8350885030999028,
1208
+ "eval_loss": 0.47168704867362976,
1209
+ "eval_precision": 0.8376424034680915,
1210
+ "eval_recall": 0.8348623853211009,
1211
+ "eval_runtime": 4.1897,
1212
+ "eval_samples_per_second": 26.016,
1213
+ "eval_steps_per_second": 0.477,
1214
+ "step": 576
1215
+ },
1216
+ {
1217
+ "epoch": 72.48,
1218
+ "learning_rate": 1.25e-05,
1219
+ "loss": 0.0471,
1220
+ "step": 580
1221
+ },
1222
+ {
1223
+ "epoch": 72.97,
1224
+ "eval_accuracy": 0.8256880733944955,
1225
+ "eval_f1": 0.8295520158325865,
1226
+ "eval_loss": 0.48450949788093567,
1227
+ "eval_precision": 0.837811160528748,
1228
+ "eval_recall": 0.8256880733944955,
1229
+ "eval_runtime": 4.2446,
1230
+ "eval_samples_per_second": 25.68,
1231
+ "eval_steps_per_second": 0.471,
1232
+ "step": 584
1233
+ },
1234
+ {
1235
+ "epoch": 73.73,
1236
+ "learning_rate": 1.0416666666666666e-05,
1237
+ "loss": 0.0322,
1238
+ "step": 590
1239
+ },
1240
+ {
1241
+ "epoch": 73.97,
1242
+ "eval_accuracy": 0.7706422018348624,
1243
+ "eval_f1": 0.7692886041532689,
1244
+ "eval_loss": 0.5188109278678894,
1245
+ "eval_precision": 0.7689271840647987,
1246
+ "eval_recall": 0.7706422018348624,
1247
+ "eval_runtime": 4.4833,
1248
+ "eval_samples_per_second": 24.313,
1249
+ "eval_steps_per_second": 0.446,
1250
+ "step": 592
1251
+ },
1252
+ {
1253
+ "epoch": 74.97,
1254
+ "learning_rate": 8.333333333333334e-06,
1255
+ "loss": 0.042,
1256
+ "step": 600
1257
+ },
1258
+ {
1259
+ "epoch": 74.97,
1260
+ "eval_accuracy": 0.7706422018348624,
1261
+ "eval_f1": 0.7700888306392893,
1262
+ "eval_loss": 0.5242283940315247,
1263
+ "eval_precision": 0.7698942959712803,
1264
+ "eval_recall": 0.7706422018348624,
1265
+ "eval_runtime": 4.4923,
1266
+ "eval_samples_per_second": 24.264,
1267
+ "eval_steps_per_second": 0.445,
1268
+ "step": 600
1269
+ },
1270
+ {
1271
+ "epoch": 75.97,
1272
+ "eval_accuracy": 0.7798165137614679,
1273
+ "eval_f1": 0.7803733958918072,
1274
+ "eval_loss": 0.5945499539375305,
1275
+ "eval_precision": 0.7823930357635982,
1276
+ "eval_recall": 0.7798165137614679,
1277
+ "eval_runtime": 5.1633,
1278
+ "eval_samples_per_second": 21.111,
1279
+ "eval_steps_per_second": 0.387,
1280
+ "step": 608
1281
+ },
1282
+ {
1283
+ "epoch": 76.24,
1284
+ "learning_rate": 6.25e-06,
1285
+ "loss": 0.0416,
1286
+ "step": 610
1287
+ },
1288
+ {
1289
+ "epoch": 76.97,
1290
+ "eval_accuracy": 0.7981651376146789,
1291
+ "eval_f1": 0.7993445642022374,
1292
+ "eval_loss": 0.5432426929473877,
1293
+ "eval_precision": 0.8038185145085823,
1294
+ "eval_recall": 0.7981651376146789,
1295
+ "eval_runtime": 4.2234,
1296
+ "eval_samples_per_second": 25.809,
1297
+ "eval_steps_per_second": 0.474,
1298
+ "step": 616
1299
+ },
1300
+ {
1301
+ "epoch": 77.48,
1302
+ "learning_rate": 4.166666666666667e-06,
1303
+ "loss": 0.0399,
1304
+ "step": 620
1305
+ },
1306
+ {
1307
+ "epoch": 77.97,
1308
+ "eval_accuracy": 0.7981651376146789,
1309
+ "eval_f1": 0.7994120087594209,
1310
+ "eval_loss": 0.5381362438201904,
1311
+ "eval_precision": 0.8072242548450546,
1312
+ "eval_recall": 0.7981651376146789,
1313
+ "eval_runtime": 4.2152,
1314
+ "eval_samples_per_second": 25.859,
1315
+ "eval_steps_per_second": 0.474,
1316
+ "step": 624
1317
+ },
1318
+ {
1319
+ "epoch": 78.73,
1320
+ "learning_rate": 2.0833333333333334e-06,
1321
+ "loss": 0.0439,
1322
+ "step": 630
1323
+ },
1324
+ {
1325
+ "epoch": 78.97,
1326
+ "eval_accuracy": 0.7798165137614679,
1327
+ "eval_f1": 0.7827462243679189,
1328
+ "eval_loss": 0.6181262135505676,
1329
+ "eval_precision": 0.7877941763063422,
1330
+ "eval_recall": 0.7798165137614679,
1331
+ "eval_runtime": 4.2424,
1332
+ "eval_samples_per_second": 25.693,
1333
+ "eval_steps_per_second": 0.471,
1334
+ "step": 632
1335
+ },
1336
+ {
1337
+ "epoch": 79.97,
1338
+ "learning_rate": 0.0,
1339
+ "loss": 0.0462,
1340
+ "step": 640
1341
+ },
1342
+ {
1343
+ "epoch": 79.97,
1344
+ "eval_accuracy": 0.8165137614678899,
1345
+ "eval_f1": 0.8172526992448356,
1346
+ "eval_loss": 0.48008766770362854,
1347
+ "eval_precision": 0.8181998512273742,
1348
+ "eval_recall": 0.8165137614678899,
1349
+ "eval_runtime": 4.18,
1350
+ "eval_samples_per_second": 26.076,
1351
+ "eval_steps_per_second": 0.478,
1352
+ "step": 640
1353
+ },
1354
+ {
1355
+ "epoch": 79.97,
1356
+ "step": 640,
1357
+ "total_flos": 3.5167284631649157e+18,
1358
+ "train_loss": 0.37728101573884487,
1359
+ "train_runtime": 2345.4183,
1360
+ "train_samples_per_second": 70.606,
1361
+ "train_steps_per_second": 0.273
1362
  }
1363
  ],
1364
+ "max_steps": 640,
1365
+ "num_train_epochs": 80,
1366
+ "total_flos": 3.5167284631649157e+18,
1367
  "trial_name": null,
1368
  "trial_params": null
1369
  }