tiennguyenbnbk commited on
Commit
84402d3
1 Parent(s): d6a34bb

End of training

Browse files
Files changed (5) hide show
  1. README.md +5 -5
  2. all_results.json +16 -0
  3. test_results.json +11 -0
  4. train_results.json +8 -0
  5. trainer_state.json +600 -0
README.md CHANGED
@@ -18,11 +18,11 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [vinai/phobert-base-v2](https://huggingface.co/vinai/phobert-base-v2) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.3110
22
- - Accuracy: 0.9329
23
- - F1 Score: 0.9237
24
- - Recall: 0.9279
25
- - Precision: 0.9198
26
 
27
  ## Model description
28
 
 
18
 
19
  This model is a fine-tuned version of [vinai/phobert-base-v2](https://huggingface.co/vinai/phobert-base-v2) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.2873
22
+ - Accuracy: 0.9323
23
+ - F1 Score: 0.9262
24
+ - Recall: 0.9217
25
+ - Precision: 0.9320
26
 
27
  ## Model description
28
 
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 26.08695652173913,
3
+ "eval_accuracy": 0.9323185648273987,
4
+ "eval_f1_score": 0.9261695691084951,
5
+ "eval_loss": 0.2873239815235138,
6
+ "eval_precision": 0.9319834922740233,
7
+ "eval_recall": 0.9216726996777184,
8
+ "eval_runtime": 6.6463,
9
+ "eval_samples_per_second": 553.538,
10
+ "eval_steps_per_second": 8.727,
11
+ "total_flos": 6579999363349350.0,
12
+ "train_loss": 0.31640464369455973,
13
+ "train_runtime": 3238.6183,
14
+ "train_samples_per_second": 158.092,
15
+ "train_steps_per_second": 1.235
16
+ }
test_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 26.08695652173913,
3
+ "eval_accuracy": 0.9323185648273987,
4
+ "eval_f1_score": 0.9261695691084951,
5
+ "eval_loss": 0.2873239815235138,
6
+ "eval_precision": 0.9319834922740233,
7
+ "eval_recall": 0.9216726996777184,
8
+ "eval_runtime": 6.6463,
9
+ "eval_samples_per_second": 553.538,
10
+ "eval_steps_per_second": 8.727
11
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 26.08695652173913,
3
+ "total_flos": 6579999363349350.0,
4
+ "train_loss": 0.31640464369455973,
5
+ "train_runtime": 3238.6183,
6
+ "train_samples_per_second": 158.092,
7
+ "train_steps_per_second": 1.235
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,600 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9261695691084951,
3
+ "best_model_checkpoint": "cls_comment-phobert-base-v2-v3.2.1/checkpoint-2000",
4
+ "epoch": 26.08695652173913,
5
+ "eval_steps": 100,
6
+ "global_step": 3000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.8695652173913043,
13
+ "grad_norm": 2.4986989498138428,
14
+ "learning_rate": 2.5e-06,
15
+ "loss": 1.8947,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.8695652173913043,
20
+ "eval_accuracy": 0.4001087251970644,
21
+ "eval_f1_score": 0.08320411950694513,
22
+ "eval_loss": 1.68748140335083,
23
+ "eval_precision": 0.14637409036074248,
24
+ "eval_recall": 0.14367816091954025,
25
+ "eval_runtime": 6.6876,
26
+ "eval_samples_per_second": 550.125,
27
+ "eval_steps_per_second": 8.673,
28
+ "step": 100
29
+ },
30
+ {
31
+ "epoch": 1.7391304347826086,
32
+ "grad_norm": 1.596021294593811,
33
+ "learning_rate": 5e-06,
34
+ "loss": 1.5395,
35
+ "step": 200
36
+ },
37
+ {
38
+ "epoch": 1.7391304347826086,
39
+ "eval_accuracy": 0.5849415602065778,
40
+ "eval_f1_score": 0.2355807809182458,
41
+ "eval_loss": 1.2897096872329712,
42
+ "eval_precision": 0.27516139357553443,
43
+ "eval_recall": 0.26320585050663253,
44
+ "eval_runtime": 6.7112,
45
+ "eval_samples_per_second": 548.189,
46
+ "eval_steps_per_second": 8.642,
47
+ "step": 200
48
+ },
49
+ {
50
+ "epoch": 2.608695652173913,
51
+ "grad_norm": 5.161496162414551,
52
+ "learning_rate": 7.500000000000001e-06,
53
+ "loss": 1.1205,
54
+ "step": 300
55
+ },
56
+ {
57
+ "epoch": 2.608695652173913,
58
+ "eval_accuracy": 0.7999456374014677,
59
+ "eval_f1_score": 0.5833178086765388,
60
+ "eval_loss": 0.8468331098556519,
61
+ "eval_precision": 0.5889764394952819,
62
+ "eval_recall": 0.5810488238671468,
63
+ "eval_runtime": 6.6686,
64
+ "eval_samples_per_second": 551.694,
65
+ "eval_steps_per_second": 8.698,
66
+ "step": 300
67
+ },
68
+ {
69
+ "epoch": 3.4782608695652173,
70
+ "grad_norm": 5.227330207824707,
71
+ "learning_rate": 1e-05,
72
+ "loss": 0.82,
73
+ "step": 400
74
+ },
75
+ {
76
+ "epoch": 3.4782608695652173,
77
+ "eval_accuracy": 0.8369122044033704,
78
+ "eval_f1_score": 0.6179371343772609,
79
+ "eval_loss": 0.6537477374076843,
80
+ "eval_precision": 0.6062100200393906,
81
+ "eval_recall": 0.6355302315827523,
82
+ "eval_runtime": 6.728,
83
+ "eval_samples_per_second": 546.823,
84
+ "eval_steps_per_second": 8.621,
85
+ "step": 400
86
+ },
87
+ {
88
+ "epoch": 4.3478260869565215,
89
+ "grad_norm": 5.6816534996032715,
90
+ "learning_rate": 9.722222222222223e-06,
91
+ "loss": 0.6232,
92
+ "step": 500
93
+ },
94
+ {
95
+ "epoch": 4.3478260869565215,
96
+ "eval_accuracy": 0.8537646099483556,
97
+ "eval_f1_score": 0.633743239294036,
98
+ "eval_loss": 0.537100613117218,
99
+ "eval_precision": 0.7525070200257705,
100
+ "eval_recall": 0.6518017678843925,
101
+ "eval_runtime": 6.7932,
102
+ "eval_samples_per_second": 541.573,
103
+ "eval_steps_per_second": 8.538,
104
+ "step": 500
105
+ },
106
+ {
107
+ "epoch": 5.217391304347826,
108
+ "grad_norm": 5.096814155578613,
109
+ "learning_rate": 9.444444444444445e-06,
110
+ "loss": 0.5148,
111
+ "step": 600
112
+ },
113
+ {
114
+ "epoch": 5.217391304347826,
115
+ "eval_accuracy": 0.872791519434629,
116
+ "eval_f1_score": 0.7299293979398146,
117
+ "eval_loss": 0.46505650877952576,
118
+ "eval_precision": 0.7548552896750885,
119
+ "eval_recall": 0.7210618976649555,
120
+ "eval_runtime": 6.7028,
121
+ "eval_samples_per_second": 548.875,
122
+ "eval_steps_per_second": 8.653,
123
+ "step": 600
124
+ },
125
+ {
126
+ "epoch": 6.086956521739131,
127
+ "grad_norm": 5.458530902862549,
128
+ "learning_rate": 9.166666666666666e-06,
129
+ "loss": 0.4204,
130
+ "step": 700
131
+ },
132
+ {
133
+ "epoch": 6.086956521739131,
134
+ "eval_accuracy": 0.8869257950530035,
135
+ "eval_f1_score": 0.7654329783869755,
136
+ "eval_loss": 0.40097591280937195,
137
+ "eval_precision": 0.8914471413846636,
138
+ "eval_recall": 0.7712133932759179,
139
+ "eval_runtime": 6.7443,
140
+ "eval_samples_per_second": 545.497,
141
+ "eval_steps_per_second": 8.6,
142
+ "step": 700
143
+ },
144
+ {
145
+ "epoch": 6.956521739130435,
146
+ "grad_norm": 6.144416809082031,
147
+ "learning_rate": 8.888888888888888e-06,
148
+ "loss": 0.3421,
149
+ "step": 800
150
+ },
151
+ {
152
+ "epoch": 6.956521739130435,
153
+ "eval_accuracy": 0.9051372655612938,
154
+ "eval_f1_score": 0.8713582894968701,
155
+ "eval_loss": 0.3648029565811157,
156
+ "eval_precision": 0.8940734807154502,
157
+ "eval_recall": 0.8588405388993653,
158
+ "eval_runtime": 6.7352,
159
+ "eval_samples_per_second": 546.232,
160
+ "eval_steps_per_second": 8.611,
161
+ "step": 800
162
+ },
163
+ {
164
+ "epoch": 7.826086956521739,
165
+ "grad_norm": 9.907292366027832,
166
+ "learning_rate": 8.611111111111112e-06,
167
+ "loss": 0.2841,
168
+ "step": 900
169
+ },
170
+ {
171
+ "epoch": 7.826086956521739,
172
+ "eval_accuracy": 0.9181842892090242,
173
+ "eval_f1_score": 0.9006880118200489,
174
+ "eval_loss": 0.3239505887031555,
175
+ "eval_precision": 0.8978148514278343,
176
+ "eval_recall": 0.9038252102525616,
177
+ "eval_runtime": 6.7315,
178
+ "eval_samples_per_second": 546.532,
179
+ "eval_steps_per_second": 8.616,
180
+ "step": 900
181
+ },
182
+ {
183
+ "epoch": 8.695652173913043,
184
+ "grad_norm": 6.941843032836914,
185
+ "learning_rate": 8.333333333333334e-06,
186
+ "loss": 0.2319,
187
+ "step": 1000
188
+ },
189
+ {
190
+ "epoch": 8.695652173913043,
191
+ "eval_accuracy": 0.9203587931503125,
192
+ "eval_f1_score": 0.9060754755748909,
193
+ "eval_loss": 0.3025033473968506,
194
+ "eval_precision": 0.9175362378163865,
195
+ "eval_recall": 0.8975903509513042,
196
+ "eval_runtime": 6.6908,
197
+ "eval_samples_per_second": 549.861,
198
+ "eval_steps_per_second": 8.669,
199
+ "step": 1000
200
+ },
201
+ {
202
+ "epoch": 9.565217391304348,
203
+ "grad_norm": 4.255012035369873,
204
+ "learning_rate": 8.055555555555557e-06,
205
+ "loss": 0.205,
206
+ "step": 1100
207
+ },
208
+ {
209
+ "epoch": 9.565217391304348,
210
+ "eval_accuracy": 0.9209024191356346,
211
+ "eval_f1_score": 0.9098640550303895,
212
+ "eval_loss": 0.29862046241760254,
213
+ "eval_precision": 0.9123097696068861,
214
+ "eval_recall": 0.9086287269577242,
215
+ "eval_runtime": 6.7134,
216
+ "eval_samples_per_second": 548.01,
217
+ "eval_steps_per_second": 8.639,
218
+ "step": 1100
219
+ },
220
+ {
221
+ "epoch": 10.434782608695652,
222
+ "grad_norm": 5.848569393157959,
223
+ "learning_rate": 7.77777777777778e-06,
224
+ "loss": 0.1783,
225
+ "step": 1200
226
+ },
227
+ {
228
+ "epoch": 10.434782608695652,
229
+ "eval_accuracy": 0.9206306061429737,
230
+ "eval_f1_score": 0.9104384776037051,
231
+ "eval_loss": 0.3047122657299042,
232
+ "eval_precision": 0.9024848857165658,
233
+ "eval_recall": 0.9207396220750284,
234
+ "eval_runtime": 6.6561,
235
+ "eval_samples_per_second": 552.726,
236
+ "eval_steps_per_second": 8.714,
237
+ "step": 1200
238
+ },
239
+ {
240
+ "epoch": 11.304347826086957,
241
+ "grad_norm": 7.340043544769287,
242
+ "learning_rate": 7.500000000000001e-06,
243
+ "loss": 0.1587,
244
+ "step": 1300
245
+ },
246
+ {
247
+ "epoch": 11.304347826086957,
248
+ "eval_accuracy": 0.9296004349007883,
249
+ "eval_f1_score": 0.9202832724978299,
250
+ "eval_loss": 0.2757803201675415,
251
+ "eval_precision": 0.9233347498988893,
252
+ "eval_recall": 0.917658614989255,
253
+ "eval_runtime": 6.6787,
254
+ "eval_samples_per_second": 550.859,
255
+ "eval_steps_per_second": 8.684,
256
+ "step": 1300
257
+ },
258
+ {
259
+ "epoch": 12.173913043478262,
260
+ "grad_norm": 5.315700054168701,
261
+ "learning_rate": 7.222222222222223e-06,
262
+ "loss": 0.1286,
263
+ "step": 1400
264
+ },
265
+ {
266
+ "epoch": 12.173913043478262,
267
+ "eval_accuracy": 0.9266104919815167,
268
+ "eval_f1_score": 0.9144278995332229,
269
+ "eval_loss": 0.29267847537994385,
270
+ "eval_precision": 0.9100638576136009,
271
+ "eval_recall": 0.9198715425139269,
272
+ "eval_runtime": 6.7676,
273
+ "eval_samples_per_second": 543.619,
274
+ "eval_steps_per_second": 8.57,
275
+ "step": 1400
276
+ },
277
+ {
278
+ "epoch": 13.043478260869565,
279
+ "grad_norm": 5.173799514770508,
280
+ "learning_rate": 6.944444444444445e-06,
281
+ "loss": 0.1221,
282
+ "step": 1500
283
+ },
284
+ {
285
+ "epoch": 13.043478260869565,
286
+ "eval_accuracy": 0.9317749388420766,
287
+ "eval_f1_score": 0.9245023460604546,
288
+ "eval_loss": 0.28211963176727295,
289
+ "eval_precision": 0.9309417300478454,
290
+ "eval_recall": 0.9193579289135766,
291
+ "eval_runtime": 6.7091,
292
+ "eval_samples_per_second": 548.359,
293
+ "eval_steps_per_second": 8.645,
294
+ "step": 1500
295
+ },
296
+ {
297
+ "epoch": 13.91304347826087,
298
+ "grad_norm": 8.639619827270508,
299
+ "learning_rate": 6.666666666666667e-06,
300
+ "loss": 0.1087,
301
+ "step": 1600
302
+ },
303
+ {
304
+ "epoch": 13.91304347826087,
305
+ "eval_accuracy": 0.9293286219081273,
306
+ "eval_f1_score": 0.9159607873769989,
307
+ "eval_loss": 0.27890825271606445,
308
+ "eval_precision": 0.9090390134661626,
309
+ "eval_recall": 0.9236924050215896,
310
+ "eval_runtime": 6.7017,
311
+ "eval_samples_per_second": 548.966,
312
+ "eval_steps_per_second": 8.655,
313
+ "step": 1600
314
+ },
315
+ {
316
+ "epoch": 14.782608695652174,
317
+ "grad_norm": 6.424872398376465,
318
+ "learning_rate": 6.3888888888888885e-06,
319
+ "loss": 0.0982,
320
+ "step": 1700
321
+ },
322
+ {
323
+ "epoch": 14.782608695652174,
324
+ "eval_accuracy": 0.9290568089154662,
325
+ "eval_f1_score": 0.9196461825352048,
326
+ "eval_loss": 0.2833573520183563,
327
+ "eval_precision": 0.9187836187318232,
328
+ "eval_recall": 0.9213402050339831,
329
+ "eval_runtime": 6.7096,
330
+ "eval_samples_per_second": 548.32,
331
+ "eval_steps_per_second": 8.644,
332
+ "step": 1700
333
+ },
334
+ {
335
+ "epoch": 15.652173913043478,
336
+ "grad_norm": 4.618613243103027,
337
+ "learning_rate": 6.111111111111112e-06,
338
+ "loss": 0.089,
339
+ "step": 1800
340
+ },
341
+ {
342
+ "epoch": 15.652173913043478,
343
+ "eval_accuracy": 0.9298722478934494,
344
+ "eval_f1_score": 0.9202166850732406,
345
+ "eval_loss": 0.28280356526374817,
346
+ "eval_precision": 0.9151663252588741,
347
+ "eval_recall": 0.9260674008256092,
348
+ "eval_runtime": 6.7345,
349
+ "eval_samples_per_second": 546.292,
350
+ "eval_steps_per_second": 8.612,
351
+ "step": 1800
352
+ },
353
+ {
354
+ "epoch": 16.52173913043478,
355
+ "grad_norm": 1.9568698406219482,
356
+ "learning_rate": 5.833333333333334e-06,
357
+ "loss": 0.0795,
358
+ "step": 1900
359
+ },
360
+ {
361
+ "epoch": 16.52173913043478,
362
+ "eval_accuracy": 0.9331340038053819,
363
+ "eval_f1_score": 0.9244095368032713,
364
+ "eval_loss": 0.273701936006546,
365
+ "eval_precision": 0.925343846727414,
366
+ "eval_recall": 0.9238732382441093,
367
+ "eval_runtime": 6.7425,
368
+ "eval_samples_per_second": 545.641,
369
+ "eval_steps_per_second": 8.602,
370
+ "step": 1900
371
+ },
372
+ {
373
+ "epoch": 17.391304347826086,
374
+ "grad_norm": 2.161759614944458,
375
+ "learning_rate": 5.555555555555557e-06,
376
+ "loss": 0.0684,
377
+ "step": 2000
378
+ },
379
+ {
380
+ "epoch": 17.391304347826086,
381
+ "eval_accuracy": 0.9323185648273987,
382
+ "eval_f1_score": 0.9261695691084951,
383
+ "eval_loss": 0.2873239815235138,
384
+ "eval_precision": 0.9319834922740233,
385
+ "eval_recall": 0.9216726996777184,
386
+ "eval_runtime": 6.7577,
387
+ "eval_samples_per_second": 544.415,
388
+ "eval_steps_per_second": 8.583,
389
+ "step": 2000
390
+ },
391
+ {
392
+ "epoch": 18.26086956521739,
393
+ "grad_norm": 4.607916355133057,
394
+ "learning_rate": 5.2777777777777785e-06,
395
+ "loss": 0.0673,
396
+ "step": 2100
397
+ },
398
+ {
399
+ "epoch": 18.26086956521739,
400
+ "eval_accuracy": 0.9320467518347377,
401
+ "eval_f1_score": 0.925184613434992,
402
+ "eval_loss": 0.2904324531555176,
403
+ "eval_precision": 0.9332741752610002,
404
+ "eval_recall": 0.9184445089519725,
405
+ "eval_runtime": 6.7294,
406
+ "eval_samples_per_second": 546.702,
407
+ "eval_steps_per_second": 8.619,
408
+ "step": 2100
409
+ },
410
+ {
411
+ "epoch": 19.130434782608695,
412
+ "grad_norm": 6.327251434326172,
413
+ "learning_rate": 5e-06,
414
+ "loss": 0.0571,
415
+ "step": 2200
416
+ },
417
+ {
418
+ "epoch": 19.130434782608695,
419
+ "eval_accuracy": 0.9293286219081273,
420
+ "eval_f1_score": 0.9221668516434853,
421
+ "eval_loss": 0.3166205883026123,
422
+ "eval_precision": 0.925137476734381,
423
+ "eval_recall": 0.920952609526737,
424
+ "eval_runtime": 6.7037,
425
+ "eval_samples_per_second": 548.799,
426
+ "eval_steps_per_second": 8.652,
427
+ "step": 2200
428
+ },
429
+ {
430
+ "epoch": 20.0,
431
+ "grad_norm": 9.082805633544922,
432
+ "learning_rate": 4.722222222222222e-06,
433
+ "loss": 0.0561,
434
+ "step": 2300
435
+ },
436
+ {
437
+ "epoch": 20.0,
438
+ "eval_accuracy": 0.9317749388420766,
439
+ "eval_f1_score": 0.9221280725480369,
440
+ "eval_loss": 0.2922111749649048,
441
+ "eval_precision": 0.9150274852978553,
442
+ "eval_recall": 0.9297539237688469,
443
+ "eval_runtime": 6.7503,
444
+ "eval_samples_per_second": 545.017,
445
+ "eval_steps_per_second": 8.592,
446
+ "step": 2300
447
+ },
448
+ {
449
+ "epoch": 20.869565217391305,
450
+ "grad_norm": 5.283856391906738,
451
+ "learning_rate": 4.444444444444444e-06,
452
+ "loss": 0.0511,
453
+ "step": 2400
454
+ },
455
+ {
456
+ "epoch": 20.869565217391305,
457
+ "eval_accuracy": 0.9315031258494156,
458
+ "eval_f1_score": 0.9190655007648246,
459
+ "eval_loss": 0.29927295446395874,
460
+ "eval_precision": 0.9088064828335735,
461
+ "eval_recall": 0.9303236730969998,
462
+ "eval_runtime": 6.7281,
463
+ "eval_samples_per_second": 546.809,
464
+ "eval_steps_per_second": 8.621,
465
+ "step": 2400
466
+ },
467
+ {
468
+ "epoch": 21.73913043478261,
469
+ "grad_norm": 6.0074896812438965,
470
+ "learning_rate": 4.166666666666667e-06,
471
+ "loss": 0.0442,
472
+ "step": 2500
473
+ },
474
+ {
475
+ "epoch": 21.73913043478261,
476
+ "eval_accuracy": 0.9266104919815167,
477
+ "eval_f1_score": 0.9161795338292905,
478
+ "eval_loss": 0.32011494040489197,
479
+ "eval_precision": 0.9060451440252857,
480
+ "eval_recall": 0.9280493422296427,
481
+ "eval_runtime": 6.7127,
482
+ "eval_samples_per_second": 548.067,
483
+ "eval_steps_per_second": 8.64,
484
+ "step": 2500
485
+ },
486
+ {
487
+ "epoch": 22.608695652173914,
488
+ "grad_norm": 3.1078407764434814,
489
+ "learning_rate": 3.88888888888889e-06,
490
+ "loss": 0.0447,
491
+ "step": 2600
492
+ },
493
+ {
494
+ "epoch": 22.608695652173914,
495
+ "eval_accuracy": 0.928241369937483,
496
+ "eval_f1_score": 0.9137497551284842,
497
+ "eval_loss": 0.3155056834220886,
498
+ "eval_precision": 0.9009580466238951,
499
+ "eval_recall": 0.9281730038314259,
500
+ "eval_runtime": 6.7337,
501
+ "eval_samples_per_second": 546.354,
502
+ "eval_steps_per_second": 8.613,
503
+ "step": 2600
504
+ },
505
+ {
506
+ "epoch": 23.47826086956522,
507
+ "grad_norm": 2.9584195613861084,
508
+ "learning_rate": 3.6111111111111115e-06,
509
+ "loss": 0.0415,
510
+ "step": 2700
511
+ },
512
+ {
513
+ "epoch": 23.47826086956522,
514
+ "eval_accuracy": 0.9334058167980429,
515
+ "eval_f1_score": 0.9226018260362496,
516
+ "eval_loss": 0.30177852511405945,
517
+ "eval_precision": 0.9185179495480513,
518
+ "eval_recall": 0.9269833265460256,
519
+ "eval_runtime": 6.7411,
520
+ "eval_samples_per_second": 545.757,
521
+ "eval_steps_per_second": 8.604,
522
+ "step": 2700
523
+ },
524
+ {
525
+ "epoch": 24.347826086956523,
526
+ "grad_norm": 12.190321922302246,
527
+ "learning_rate": 3.3333333333333333e-06,
528
+ "loss": 0.0359,
529
+ "step": 2800
530
+ },
531
+ {
532
+ "epoch": 24.347826086956523,
533
+ "eval_accuracy": 0.9298722478934494,
534
+ "eval_f1_score": 0.9177278989948806,
535
+ "eval_loss": 0.31918126344680786,
536
+ "eval_precision": 0.9062664068560837,
537
+ "eval_recall": 0.9308234663752396,
538
+ "eval_runtime": 6.7802,
539
+ "eval_samples_per_second": 542.606,
540
+ "eval_steps_per_second": 8.554,
541
+ "step": 2800
542
+ },
543
+ {
544
+ "epoch": 25.217391304347824,
545
+ "grad_norm": 0.2598835527896881,
546
+ "learning_rate": 3.055555555555556e-06,
547
+ "loss": 0.0369,
548
+ "step": 2900
549
+ },
550
+ {
551
+ "epoch": 25.217391304347824,
552
+ "eval_accuracy": 0.933677629790704,
553
+ "eval_f1_score": 0.9210521238209074,
554
+ "eval_loss": 0.3063570559024811,
555
+ "eval_precision": 0.9140578271273506,
556
+ "eval_recall": 0.9285610502121662,
557
+ "eval_runtime": 6.7729,
558
+ "eval_samples_per_second": 543.197,
559
+ "eval_steps_per_second": 8.564,
560
+ "step": 2900
561
+ },
562
+ {
563
+ "epoch": 26.08695652173913,
564
+ "grad_norm": 0.24433408677577972,
565
+ "learning_rate": 2.7777777777777783e-06,
566
+ "loss": 0.0296,
567
+ "step": 3000
568
+ },
569
+ {
570
+ "epoch": 26.08695652173913,
571
+ "eval_accuracy": 0.9328621908127208,
572
+ "eval_f1_score": 0.9237047805131925,
573
+ "eval_loss": 0.311038613319397,
574
+ "eval_precision": 0.9198460229141495,
575
+ "eval_recall": 0.9279424126946928,
576
+ "eval_runtime": 6.8161,
577
+ "eval_samples_per_second": 539.754,
578
+ "eval_steps_per_second": 8.509,
579
+ "step": 3000
580
+ },
581
+ {
582
+ "epoch": 26.08695652173913,
583
+ "step": 3000,
584
+ "total_flos": 6579999363349350.0,
585
+ "train_loss": 0.31640464369455973,
586
+ "train_runtime": 3238.6183,
587
+ "train_samples_per_second": 158.092,
588
+ "train_steps_per_second": 1.235
589
+ }
590
+ ],
591
+ "logging_steps": 100,
592
+ "max_steps": 4000,
593
+ "num_input_tokens_seen": 0,
594
+ "num_train_epochs": 35,
595
+ "save_steps": 100,
596
+ "total_flos": 6579999363349350.0,
597
+ "train_batch_size": 64,
598
+ "trial_name": null,
599
+ "trial_params": null
600
+ }