apwic commited on
Commit
bb89480
1 Parent(s): befe9f5

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: mit
3
  base_model: indolem/indobert-base-uncased
4
  tags:
 
1
  ---
2
+ language:
3
+ - id
4
  license: mit
5
  base_model: indolem/indobert-base-uncased
6
  tags:
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "accuracy": 0.9090009891196835,
3
  "epoch": 20.0,
4
- "eval_accuracy": 0.8972431077694235,
5
- "eval_f1": 0.8763538792940554,
6
- "eval_loss": 0.30106595158576965,
7
- "eval_precision": 0.8754297605404427,
8
- "eval_recall": 0.877295871976723,
9
- "eval_runtime": 5.0202,
10
  "eval_samples": 399,
11
- "eval_samples_per_second": 79.479,
12
- "eval_steps_per_second": 9.96,
13
- "f1": 0.8914010031851596,
14
- "precision": 0.8885134195522181,
15
- "recall": 0.8944670877377938,
16
- "train_loss": 0.24252970335913487,
17
- "train_runtime": 1919.2772,
18
  "train_samples": 3638,
19
- "train_samples_per_second": 37.91,
20
- "train_steps_per_second": 1.271
21
  }
 
1
  {
2
+ "accuracy": 0.9060336300692384,
3
  "epoch": 20.0,
4
+ "eval_accuracy": 0.8847117794486216,
5
+ "eval_f1": 0.8609292598654301,
6
+ "eval_loss": 0.3090469241142273,
7
+ "eval_precision": 0.8609292598654301,
8
+ "eval_recall": 0.8609292598654301,
9
+ "eval_runtime": 1.8139,
10
  "eval_samples": 399,
11
+ "eval_samples_per_second": 219.963,
12
+ "eval_steps_per_second": 27.564,
13
+ "f1": 0.8885945244345052,
14
+ "precision": 0.8834872799509323,
15
+ "recall": 0.8943164810753316,
16
+ "train_loss": 0.242632052937492,
17
+ "train_runtime": 627.3825,
18
  "train_samples": 3638,
19
+ "train_samples_per_second": 115.974,
20
+ "train_steps_per_second": 3.889
21
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.8972431077694235,
4
- "eval_f1": 0.8763538792940554,
5
- "eval_loss": 0.30106595158576965,
6
- "eval_precision": 0.8754297605404427,
7
- "eval_recall": 0.877295871976723,
8
- "eval_runtime": 5.0202,
9
  "eval_samples": 399,
10
- "eval_samples_per_second": 79.479,
11
- "eval_steps_per_second": 9.96
12
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.8847117794486216,
4
+ "eval_f1": 0.8609292598654301,
5
+ "eval_loss": 0.3090469241142273,
6
+ "eval_precision": 0.8609292598654301,
7
+ "eval_recall": 0.8609292598654301,
8
+ "eval_runtime": 1.8139,
9
  "eval_samples": 399,
10
+ "eval_samples_per_second": 219.963,
11
+ "eval_steps_per_second": 27.564
12
  }
predict_results.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "accuracy": 0.9090009891196835,
3
- "f1": 0.8914010031851596,
4
- "precision": 0.8885134195522181,
5
- "recall": 0.8944670877377938
6
  }
 
1
  {
2
+ "accuracy": 0.9060336300692384,
3
+ "f1": 0.8885945244345052,
4
+ "precision": 0.8834872799509323,
5
+ "recall": 0.8943164810753316
6
  }
predict_results.txt CHANGED
@@ -21,7 +21,7 @@ index prediction
21
  19 1
22
  20 1
23
  21 1
24
- 22 0
25
  23 1
26
  24 1
27
  25 0
@@ -35,7 +35,7 @@ index prediction
35
  33 1
36
  34 1
37
  35 1
38
- 36 0
39
  37 1
40
  38 1
41
  39 1
@@ -44,17 +44,17 @@ index prediction
44
  42 1
45
  43 1
46
  44 1
47
- 45 1
48
  46 1
49
  47 1
50
  48 1
51
  49 1
52
  50 1
53
- 51 1
54
  52 1
55
  53 1
56
  54 1
57
- 55 1
58
  56 1
59
  57 1
60
  58 1
@@ -67,7 +67,7 @@ index prediction
67
  65 1
68
  66 1
69
  67 0
70
- 68 1
71
  69 1
72
  70 1
73
  71 1
@@ -81,7 +81,7 @@ index prediction
81
  79 1
82
  80 1
83
  81 1
84
- 82 1
85
  83 0
86
  84 1
87
  85 1
@@ -101,12 +101,12 @@ index prediction
101
  99 1
102
  100 1
103
  101 1
104
- 102 0
105
  103 1
106
  104 1
107
- 105 0
108
  106 1
109
- 107 1
110
  108 1
111
  109 1
112
  110 1
@@ -120,7 +120,7 @@ index prediction
120
  118 1
121
  119 1
122
  120 0
123
- 121 1
124
  122 1
125
  123 1
126
  124 0
@@ -151,7 +151,7 @@ index prediction
151
  149 1
152
  150 1
153
  151 0
154
- 152 0
155
  153 1
156
  154 1
157
  155 1
@@ -171,16 +171,16 @@ index prediction
171
  169 1
172
  170 0
173
  171 1
174
- 172 0
175
  173 1
176
  174 0
177
  175 1
178
  176 1
179
  177 1
180
  178 1
181
- 179 0
182
  180 1
183
- 181 1
184
  182 1
185
  183 1
186
  184 1
@@ -223,7 +223,7 @@ index prediction
223
  221 1
224
  222 1
225
  223 1
226
- 224 0
227
  225 1
228
  226 1
229
  227 1
@@ -238,7 +238,7 @@ index prediction
238
  236 1
239
  237 0
240
  238 1
241
- 239 0
242
  240 1
243
  241 1
244
  242 1
@@ -247,7 +247,7 @@ index prediction
247
  245 1
248
  246 1
249
  247 1
250
- 248 0
251
  249 1
252
  250 1
253
  251 1
@@ -259,10 +259,10 @@ index prediction
259
  257 1
260
  258 1
261
  259 1
262
- 260 0
263
  261 1
264
  262 1
265
- 263 1
266
  264 1
267
  265 1
268
  266 1
@@ -299,9 +299,9 @@ index prediction
299
  297 1
300
  298 0
301
  299 0
302
- 300 0
303
  301 0
304
- 302 1
305
  303 0
306
  304 0
307
  305 0
@@ -322,13 +322,13 @@ index prediction
322
  320 0
323
  321 0
324
  322 0
325
- 323 1
326
  324 1
327
  325 0
328
  326 0
329
  327 0
330
  328 0
331
- 329 0
332
  330 0
333
  331 0
334
  332 0
@@ -341,12 +341,12 @@ index prediction
341
  339 0
342
  340 1
343
  341 0
344
- 342 0
345
  343 0
346
  344 0
347
  345 0
348
  346 0
349
- 347 0
350
  348 0
351
  349 0
352
  350 0
@@ -381,7 +381,7 @@ index prediction
381
  379 0
382
  380 0
383
  381 0
384
- 382 1
385
  383 1
386
  384 0
387
  385 0
@@ -434,7 +434,7 @@ index prediction
434
  432 0
435
  433 0
436
  434 1
437
- 435 1
438
  436 0
439
  437 0
440
  438 0
@@ -448,15 +448,15 @@ index prediction
448
  446 0
449
  447 0
450
  448 0
451
- 449 0
452
  450 0
453
  451 0
454
  452 0
455
  453 0
456
  454 0
457
- 455 0
458
  456 0
459
- 457 0
460
  458 0
461
  459 0
462
  460 0
@@ -486,7 +486,7 @@ index prediction
486
  484 0
487
  485 0
488
  486 0
489
- 487 0
490
  488 0
491
  489 0
492
  490 0
@@ -497,12 +497,12 @@ index prediction
497
  495 0
498
  496 0
499
  497 0
500
- 498 1
501
  499 0
502
  500 0
503
  501 0
504
  502 0
505
- 503 1
506
  504 0
507
  505 0
508
  506 0
@@ -540,7 +540,7 @@ index prediction
540
  538 0
541
  539 0
542
  540 0
543
- 541 1
544
  542 0
545
  543 0
546
  544 0
@@ -580,7 +580,7 @@ index prediction
580
  578 0
581
  579 1
582
  580 0
583
- 581 0
584
  582 1
585
  583 0
586
  584 0
@@ -591,7 +591,7 @@ index prediction
591
  589 0
592
  590 0
593
  591 1
594
- 592 1
595
  593 0
596
  594 0
597
  595 0
@@ -620,7 +620,7 @@ index prediction
620
  618 0
621
  619 0
622
  620 0
623
- 621 0
624
  622 0
625
  623 0
626
  624 0
@@ -678,7 +678,7 @@ index prediction
678
  676 0
679
  677 0
680
  678 0
681
- 679 0
682
  680 0
683
  681 0
684
  682 0
@@ -711,7 +711,7 @@ index prediction
711
  709 0
712
  710 1
713
  711 0
714
- 712 0
715
  713 0
716
  714 0
717
  715 1
@@ -756,11 +756,11 @@ index prediction
756
  754 0
757
  755 0
758
  756 0
759
- 757 0
760
  758 0
761
  759 0
762
  760 0
763
- 761 0
764
  762 1
765
  763 0
766
  764 0
@@ -811,7 +811,7 @@ index prediction
811
  809 0
812
  810 0
813
  811 0
814
- 812 0
815
  813 0
816
  814 0
817
  815 0
@@ -838,20 +838,20 @@ index prediction
838
  836 0
839
  837 1
840
  838 0
841
- 839 0
842
  840 0
843
  841 0
844
  842 0
845
  843 0
846
- 844 1
847
  845 0
848
- 846 1
849
  847 0
850
  848 0
851
  849 0
852
  850 0
853
- 851 1
854
- 852 0
855
  853 0
856
  854 1
857
  855 0
@@ -864,7 +864,7 @@ index prediction
864
  862 0
865
  863 0
866
  864 0
867
- 865 1
868
  866 0
869
  867 0
870
  868 0
@@ -899,7 +899,7 @@ index prediction
899
  897 0
900
  898 0
901
  899 0
902
- 900 1
903
  901 0
904
  902 0
905
  903 0
@@ -953,7 +953,7 @@ index prediction
953
  951 0
954
  952 0
955
  953 0
956
- 954 0
957
  955 0
958
  956 0
959
  957 0
 
21
  19 1
22
  20 1
23
  21 1
24
+ 22 1
25
  23 1
26
  24 1
27
  25 0
 
35
  33 1
36
  34 1
37
  35 1
38
+ 36 1
39
  37 1
40
  38 1
41
  39 1
 
44
  42 1
45
  43 1
46
  44 1
47
+ 45 0
48
  46 1
49
  47 1
50
  48 1
51
  49 1
52
  50 1
53
+ 51 0
54
  52 1
55
  53 1
56
  54 1
57
+ 55 0
58
  56 1
59
  57 1
60
  58 1
 
67
  65 1
68
  66 1
69
  67 0
70
+ 68 0
71
  69 1
72
  70 1
73
  71 1
 
81
  79 1
82
  80 1
83
  81 1
84
+ 82 0
85
  83 0
86
  84 1
87
  85 1
 
101
  99 1
102
  100 1
103
  101 1
104
+ 102 1
105
  103 1
106
  104 1
107
+ 105 1
108
  106 1
109
+ 107 0
110
  108 1
111
  109 1
112
  110 1
 
120
  118 1
121
  119 1
122
  120 0
123
+ 121 0
124
  122 1
125
  123 1
126
  124 0
 
151
  149 1
152
  150 1
153
  151 0
154
+ 152 1
155
  153 1
156
  154 1
157
  155 1
 
171
  169 1
172
  170 0
173
  171 1
174
+ 172 1
175
  173 1
176
  174 0
177
  175 1
178
  176 1
179
  177 1
180
  178 1
181
+ 179 1
182
  180 1
183
+ 181 0
184
  182 1
185
  183 1
186
  184 1
 
223
  221 1
224
  222 1
225
  223 1
226
+ 224 1
227
  225 1
228
  226 1
229
  227 1
 
238
  236 1
239
  237 0
240
  238 1
241
+ 239 1
242
  240 1
243
  241 1
244
  242 1
 
247
  245 1
248
  246 1
249
  247 1
250
+ 248 1
251
  249 1
252
  250 1
253
  251 1
 
259
  257 1
260
  258 1
261
  259 1
262
+ 260 1
263
  261 1
264
  262 1
265
+ 263 0
266
  264 1
267
  265 1
268
  266 1
 
299
  297 1
300
  298 0
301
  299 0
302
+ 300 1
303
  301 0
304
+ 302 0
305
  303 0
306
  304 0
307
  305 0
 
322
  320 0
323
  321 0
324
  322 0
325
+ 323 0
326
  324 1
327
  325 0
328
  326 0
329
  327 0
330
  328 0
331
+ 329 1
332
  330 0
333
  331 0
334
  332 0
 
341
  339 0
342
  340 1
343
  341 0
344
+ 342 1
345
  343 0
346
  344 0
347
  345 0
348
  346 0
349
+ 347 1
350
  348 0
351
  349 0
352
  350 0
 
381
  379 0
382
  380 0
383
  381 0
384
+ 382 0
385
  383 1
386
  384 0
387
  385 0
 
434
  432 0
435
  433 0
436
  434 1
437
+ 435 0
438
  436 0
439
  437 0
440
  438 0
 
448
  446 0
449
  447 0
450
  448 0
451
+ 449 1
452
  450 0
453
  451 0
454
  452 0
455
  453 0
456
  454 0
457
+ 455 1
458
  456 0
459
+ 457 1
460
  458 0
461
  459 0
462
  460 0
 
486
  484 0
487
  485 0
488
  486 0
489
+ 487 1
490
  488 0
491
  489 0
492
  490 0
 
497
  495 0
498
  496 0
499
  497 0
500
+ 498 0
501
  499 0
502
  500 0
503
  501 0
504
  502 0
505
+ 503 0
506
  504 0
507
  505 0
508
  506 0
 
540
  538 0
541
  539 0
542
  540 0
543
+ 541 0
544
  542 0
545
  543 0
546
  544 0
 
580
  578 0
581
  579 1
582
  580 0
583
+ 581 1
584
  582 1
585
  583 0
586
  584 0
 
591
  589 0
592
  590 0
593
  591 1
594
+ 592 0
595
  593 0
596
  594 0
597
  595 0
 
620
  618 0
621
  619 0
622
  620 0
623
+ 621 1
624
  622 0
625
  623 0
626
  624 0
 
678
  676 0
679
  677 0
680
  678 0
681
+ 679 1
682
  680 0
683
  681 0
684
  682 0
 
711
  709 0
712
  710 1
713
  711 0
714
+ 712 1
715
  713 0
716
  714 0
717
  715 1
 
756
  754 0
757
  755 0
758
  756 0
759
+ 757 1
760
  758 0
761
  759 0
762
  760 0
763
+ 761 1
764
  762 1
765
  763 0
766
  764 0
 
811
  809 0
812
  810 0
813
  811 0
814
+ 812 1
815
  813 0
816
  814 0
817
  815 0
 
838
  836 0
839
  837 1
840
  838 0
841
+ 839 1
842
  840 0
843
  841 0
844
  842 0
845
  843 0
846
+ 844 0
847
  845 0
848
+ 846 0
849
  847 0
850
  848 0
851
  849 0
852
  850 0
853
+ 851 0
854
+ 852 1
855
  853 0
856
  854 1
857
  855 0
 
864
  862 0
865
  863 0
866
  864 0
867
+ 865 0
868
  866 0
869
  867 0
870
  868 0
 
899
  897 0
900
  898 0
901
  899 0
902
+ 900 0
903
  901 0
904
  902 0
905
  903 0
 
953
  951 0
954
  952 0
955
  953 0
956
+ 954 1
957
  955 0
958
  956 0
959
  957 0
runs/Jun03_13-22-10_a358b85c7679/events.out.tfevents.1717421575.a358b85c7679.105407.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fea77e7b0238bbebb10f355daa390b96c3c8f1d50c8df03db0581d0f9f01244
3
+ size 560
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.24252970335913487,
4
- "train_runtime": 1919.2772,
5
  "train_samples": 3638,
6
- "train_samples_per_second": 37.91,
7
- "train_steps_per_second": 1.271
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.242632052937492,
4
+ "train_runtime": 627.3825,
5
  "train_samples": 3638,
6
+ "train_samples_per_second": 115.974,
7
+ "train_steps_per_second": 3.889
8
  }
trainer_state.json CHANGED
@@ -10,392 +10,392 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 4.3284077644348145,
14
  "learning_rate": 4.75e-05,
15
  "loss": 0.5535,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7268170426065163,
21
- "eval_f1": 0.6326680574676724,
22
- "eval_loss": 0.5077849626541138,
23
- "eval_precision": 0.6605799373040753,
24
- "eval_recall": 0.6242044008001455,
25
- "eval_runtime": 5.0717,
26
- "eval_samples_per_second": 78.673,
27
- "eval_steps_per_second": 9.859,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 3.732577085494995,
33
  "learning_rate": 4.5e-05,
34
- "loss": 0.4682,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.8170426065162907,
40
- "eval_f1": 0.777617444284111,
41
- "eval_loss": 0.4184626042842865,
42
- "eval_precision": 0.7798245614035089,
43
- "eval_recall": 0.7755501000181851,
44
- "eval_runtime": 5.0889,
45
- "eval_samples_per_second": 78.406,
46
- "eval_steps_per_second": 9.825,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 3.4716732501983643,
52
  "learning_rate": 4.25e-05,
53
- "loss": 0.3849,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_accuracy": 0.8170426065162907,
59
- "eval_f1": 0.757268931723293,
60
- "eval_loss": 0.38087406754493713,
61
- "eval_precision": 0.7968253968253969,
62
- "eval_recall": 0.7380432805964721,
63
- "eval_runtime": 5.1872,
64
- "eval_samples_per_second": 76.92,
65
- "eval_steps_per_second": 9.639,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 1.9364979267120361,
71
  "learning_rate": 4e-05,
72
- "loss": 0.3127,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
- "eval_accuracy": 0.8571428571428571,
78
- "eval_f1": 0.8289446964056049,
79
- "eval_loss": 0.32795679569244385,
80
- "eval_precision": 0.8266129032258065,
81
- "eval_recall": 0.8314238952536825,
82
- "eval_runtime": 5.0897,
83
- "eval_samples_per_second": 78.394,
84
- "eval_steps_per_second": 9.824,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 3.656118392944336,
90
  "learning_rate": 3.7500000000000003e-05,
91
- "loss": 0.2869,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_accuracy": 0.8621553884711779,
97
- "eval_f1": 0.8341332527115377,
98
- "eval_loss": 0.3168599307537079,
99
- "eval_precision": 0.8333132275770553,
100
- "eval_recall": 0.8349699945444626,
101
- "eval_runtime": 5.064,
102
- "eval_samples_per_second": 78.791,
103
- "eval_steps_per_second": 9.874,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 2.3921332359313965,
109
  "learning_rate": 3.5e-05,
110
- "loss": 0.274,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
- "eval_accuracy": 0.8771929824561403,
116
- "eval_f1": 0.8576006759069754,
117
- "eval_loss": 0.3217551112174988,
118
- "eval_precision": 0.8466769923965081,
119
- "eval_recall": 0.8731132933260592,
120
- "eval_runtime": 5.0567,
121
- "eval_samples_per_second": 78.906,
122
- "eval_steps_per_second": 9.888,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 1.9414736032485962,
128
  "learning_rate": 3.2500000000000004e-05,
129
- "loss": 0.2539,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
- "eval_accuracy": 0.8671679197994987,
135
- "eval_f1": 0.8417152566223307,
136
- "eval_loss": 0.30381932854652405,
137
- "eval_precision": 0.8378262413446174,
138
- "eval_recall": 0.8460174577195854,
139
- "eval_runtime": 5.0791,
140
- "eval_samples_per_second": 78.557,
141
- "eval_steps_per_second": 9.844,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
- "grad_norm": 2.456129789352417,
147
  "learning_rate": 3e-05,
148
- "loss": 0.2286,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
- "eval_accuracy": 0.8671679197994987,
154
- "eval_f1": 0.8341632880321839,
155
- "eval_loss": 0.32023322582244873,
156
- "eval_precision": 0.8479139504563233,
157
- "eval_recall": 0.8235133660665576,
158
- "eval_runtime": 5.2736,
159
- "eval_samples_per_second": 75.66,
160
- "eval_steps_per_second": 9.481,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
- "grad_norm": 6.962612152099609,
166
  "learning_rate": 2.7500000000000004e-05,
167
- "loss": 0.2249,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
- "eval_accuracy": 0.8872180451127819,
173
- "eval_f1": 0.8662440310793597,
174
- "eval_loss": 0.2973268926143646,
175
- "eval_precision": 0.8606158357771261,
176
- "eval_recall": 0.872704128023277,
177
- "eval_runtime": 5.05,
178
- "eval_samples_per_second": 79.009,
179
- "eval_steps_per_second": 9.901,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
- "grad_norm": 3.1556286811828613,
185
  "learning_rate": 2.5e-05,
186
- "loss": 0.2083,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
- "eval_accuracy": 0.8721804511278195,
192
- "eval_f1": 0.8376560692488731,
193
- "eval_loss": 0.3127811551094055,
194
- "eval_precision": 0.8602278120550546,
195
- "eval_recall": 0.8220585561011093,
196
- "eval_runtime": 5.0596,
197
- "eval_samples_per_second": 78.86,
198
- "eval_steps_per_second": 9.882,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
- "grad_norm": 0.8982645273208618,
204
  "learning_rate": 2.25e-05,
205
- "loss": 0.1935,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
- "eval_accuracy": 0.8922305764411027,
211
- "eval_f1": 0.8721887408091659,
212
- "eval_loss": 0.2957092523574829,
213
- "eval_precision": 0.866466275659824,
214
- "eval_recall": 0.8787506819421713,
215
- "eval_runtime": 5.0454,
216
- "eval_samples_per_second": 79.082,
217
- "eval_steps_per_second": 9.91,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
- "grad_norm": 6.364419460296631,
223
  "learning_rate": 2e-05,
224
- "loss": 0.1859,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
- "eval_accuracy": 0.8822055137844611,
230
- "eval_f1": 0.8602993213495533,
231
- "eval_loss": 0.2869341969490051,
232
- "eval_precision": 0.8547653958944281,
233
- "eval_recall": 0.8666575741043827,
234
- "eval_runtime": 5.0666,
235
- "eval_samples_per_second": 78.751,
236
- "eval_steps_per_second": 9.869,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
- "grad_norm": 1.9051835536956787,
242
  "learning_rate": 1.75e-05,
243
- "loss": 0.1735,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
  "eval_accuracy": 0.8796992481203008,
249
- "eval_f1": 0.8502252252252251,
250
- "eval_loss": 0.30611610412597656,
251
- "eval_precision": 0.863265306122449,
252
- "eval_recall": 0.8398799781778505,
253
- "eval_runtime": 5.1317,
254
- "eval_samples_per_second": 77.752,
255
- "eval_steps_per_second": 9.743,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
- "grad_norm": 6.753292083740234,
261
  "learning_rate": 1.5e-05,
262
- "loss": 0.1804,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
- "eval_accuracy": 0.8897243107769424,
268
- "eval_f1": 0.8695225637671682,
269
- "eval_loss": 0.29550090432167053,
270
- "eval_precision": 0.8631532846715328,
271
- "eval_recall": 0.8769776322967813,
272
- "eval_runtime": 5.0486,
273
- "eval_samples_per_second": 79.032,
274
- "eval_steps_per_second": 9.904,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
- "grad_norm": 0.1921367347240448,
280
  "learning_rate": 1.25e-05,
281
- "loss": 0.1628,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
- "eval_accuracy": 0.8972431077694235,
287
- "eval_f1": 0.8757339815412664,
288
- "eval_loss": 0.2972831130027771,
289
- "eval_precision": 0.8766906299500427,
290
- "eval_recall": 0.8747954173486088,
291
- "eval_runtime": 5.0606,
292
- "eval_samples_per_second": 78.844,
293
- "eval_steps_per_second": 9.88,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
- "grad_norm": 0.38073158264160156,
299
  "learning_rate": 1e-05,
300
- "loss": 0.1619,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
- "eval_accuracy": 0.8897243107769424,
306
- "eval_f1": 0.8707140332272888,
307
- "eval_loss": 0.3023494482040405,
308
- "eval_precision": 0.8618432385874246,
309
- "eval_recall": 0.8819785415530097,
310
- "eval_runtime": 5.0599,
311
- "eval_samples_per_second": 78.856,
312
- "eval_steps_per_second": 9.882,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
- "grad_norm": 1.6226332187652588,
318
  "learning_rate": 7.5e-06,
319
- "loss": 0.1514,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
- "eval_accuracy": 0.8972431077694235,
325
- "eval_f1": 0.8775533117267087,
326
- "eval_loss": 0.2997310757637024,
327
- "eval_precision": 0.873246730188791,
328
- "eval_recall": 0.8822967812329514,
329
- "eval_runtime": 5.0625,
330
- "eval_samples_per_second": 78.815,
331
- "eval_steps_per_second": 9.877,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
- "grad_norm": 4.088443756103516,
337
  "learning_rate": 5e-06,
338
- "loss": 0.1503,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
- "eval_accuracy": 0.8947368421052632,
344
- "eval_f1": 0.8736504011098378,
345
- "eval_loss": 0.3002457618713379,
346
- "eval_precision": 0.8718487394957983,
347
- "eval_recall": 0.8755228223313329,
348
- "eval_runtime": 5.0624,
349
- "eval_samples_per_second": 78.817,
350
- "eval_steps_per_second": 9.877,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
- "grad_norm": 6.321498394012451,
356
  "learning_rate": 2.5e-06,
357
- "loss": 0.154,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
- "eval_accuracy": 0.8947368421052632,
363
- "eval_f1": 0.8730223677032187,
364
- "eval_loss": 0.303114652633667,
365
- "eval_precision": 0.8730223677032187,
366
- "eval_recall": 0.8730223677032187,
367
- "eval_runtime": 5.2549,
368
- "eval_samples_per_second": 75.929,
369
- "eval_steps_per_second": 9.515,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
- "grad_norm": 1.89798903465271,
375
  "learning_rate": 0.0,
376
- "loss": 0.1408,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
- "eval_accuracy": 0.8972431077694235,
382
- "eval_f1": 0.8763538792940554,
383
- "eval_loss": 0.30106595158576965,
384
- "eval_precision": 0.8754297605404427,
385
- "eval_recall": 0.877295871976723,
386
- "eval_runtime": 5.0597,
387
- "eval_samples_per_second": 78.858,
388
- "eval_steps_per_second": 9.882,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 8444128359504000.0,
395
- "train_loss": 0.24252970335913487,
396
- "train_runtime": 1919.2772,
397
- "train_samples_per_second": 37.91,
398
- "train_steps_per_second": 1.271
399
  }
400
  ],
401
  "logging_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 4.5641188621521,
14
  "learning_rate": 4.75e-05,
15
  "loss": 0.5535,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.7243107769423559,
21
+ "eval_f1": 0.6401043033324587,
22
+ "eval_loss": 0.5041041374206543,
23
+ "eval_precision": 0.6583725987676694,
24
+ "eval_recall": 0.6324331696672122,
25
+ "eval_runtime": 1.8214,
26
+ "eval_samples_per_second": 219.06,
27
+ "eval_steps_per_second": 27.451,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 3.286947011947632,
33
  "learning_rate": 4.5e-05,
34
+ "loss": 0.4636,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_accuracy": 0.7669172932330827,
40
+ "eval_f1": 0.7331142070096449,
41
+ "eval_loss": 0.4692240059375763,
42
+ "eval_precision": 0.7252895752895754,
43
+ "eval_recall": 0.7475904709947263,
44
+ "eval_runtime": 1.8328,
45
+ "eval_samples_per_second": 217.703,
46
+ "eval_steps_per_second": 27.281,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 3.800495147705078,
52
  "learning_rate": 4.25e-05,
53
+ "loss": 0.4023,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
+ "eval_accuracy": 0.8370927318295739,
59
+ "eval_f1": 0.7809488416091623,
60
+ "eval_loss": 0.36048197746276855,
61
+ "eval_precision": 0.832562695924765,
62
+ "eval_recall": 0.7572285870158211,
63
+ "eval_runtime": 1.8261,
64
+ "eval_samples_per_second": 218.495,
65
+ "eval_steps_per_second": 27.38,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "grad_norm": 4.0396647453308105,
71
  "learning_rate": 4e-05,
72
+ "loss": 0.3202,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
+ "eval_accuracy": 0.8546365914786967,
78
+ "eval_f1": 0.8159125620465827,
79
+ "eval_loss": 0.3256481885910034,
80
+ "eval_precision": 0.8356565656565657,
81
+ "eval_recall": 0.8021458446990362,
82
+ "eval_runtime": 1.8317,
83
+ "eval_samples_per_second": 217.825,
84
+ "eval_steps_per_second": 27.296,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 1.8746237754821777,
90
  "learning_rate": 3.7500000000000003e-05,
91
+ "loss": 0.2919,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
+ "eval_accuracy": 0.8771929824561403,
97
+ "eval_f1": 0.8475258334958082,
98
+ "eval_loss": 0.3067488968372345,
99
+ "eval_precision": 0.8591828192414193,
100
+ "eval_recall": 0.8381069285324605,
101
+ "eval_runtime": 1.8315,
102
+ "eval_samples_per_second": 217.857,
103
+ "eval_steps_per_second": 27.3,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "grad_norm": 3.942033290863037,
109
  "learning_rate": 3.5e-05,
110
+ "loss": 0.2657,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
+ "eval_accuracy": 0.8521303258145363,
116
+ "eval_f1": 0.8320383569853806,
117
+ "eval_loss": 0.3400041460990906,
118
+ "eval_precision": 0.8193218954248366,
119
+ "eval_recall": 0.8553827968721586,
120
+ "eval_runtime": 1.8306,
121
+ "eval_samples_per_second": 217.958,
122
+ "eval_steps_per_second": 27.313,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
+ "grad_norm": 0.39800548553466797,
128
  "learning_rate": 3.2500000000000004e-05,
129
+ "loss": 0.2559,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
+ "eval_accuracy": 0.87468671679198,
135
+ "eval_f1": 0.8524146298159436,
136
+ "eval_loss": 0.2993007302284241,
137
+ "eval_precision": 0.8451250578971746,
138
+ "eval_recall": 0.8613384251682124,
139
+ "eval_runtime": 1.8316,
140
+ "eval_samples_per_second": 217.847,
141
+ "eval_steps_per_second": 27.299,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "grad_norm": 7.434815406799316,
147
  "learning_rate": 3e-05,
148
+ "loss": 0.2369,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
+ "eval_accuracy": 0.8872180451127819,
154
+ "eval_f1": 0.8584001703456596,
155
+ "eval_loss": 0.30177775025367737,
156
+ "eval_precision": 0.8759655377302435,
157
+ "eval_recall": 0.8451991271140207,
158
+ "eval_runtime": 1.8332,
159
+ "eval_samples_per_second": 217.658,
160
+ "eval_steps_per_second": 27.275,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
+ "grad_norm": 6.123136043548584,
166
  "learning_rate": 2.7500000000000004e-05,
167
+ "loss": 0.2178,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
+ "eval_accuracy": 0.8847117794486216,
173
+ "eval_f1": 0.8595070422535211,
174
+ "eval_loss": 0.29259544610977173,
175
+ "eval_precision": 0.8633733523114054,
176
+ "eval_recall": 0.8559283506092017,
177
+ "eval_runtime": 1.8408,
178
+ "eval_samples_per_second": 216.759,
179
+ "eval_steps_per_second": 27.163,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
+ "grad_norm": 3.8655648231506348,
185
  "learning_rate": 2.5e-05,
186
+ "loss": 0.2118,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
+ "eval_accuracy": 0.8872180451127819,
192
+ "eval_f1": 0.8622036668943447,
193
+ "eval_loss": 0.29553094506263733,
194
+ "eval_precision": 0.8671602787456446,
195
+ "eval_recall": 0.8577014002545917,
196
+ "eval_runtime": 1.8333,
197
+ "eval_samples_per_second": 217.636,
198
+ "eval_steps_per_second": 27.273,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
+ "grad_norm": 3.4139134883880615,
204
  "learning_rate": 2.25e-05,
205
+ "loss": 0.2034,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
+ "eval_accuracy": 0.8847117794486216,
211
+ "eval_f1": 0.8572517421602788,
212
+ "eval_loss": 0.2934134602546692,
213
+ "eval_precision": 0.8679426449878376,
214
+ "eval_recall": 0.8484269867248591,
215
+ "eval_runtime": 1.8401,
216
+ "eval_samples_per_second": 216.837,
217
+ "eval_steps_per_second": 27.173,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
+ "grad_norm": 5.207653045654297,
223
  "learning_rate": 2e-05,
224
+ "loss": 0.1856,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
+ "eval_accuracy": 0.8796992481203008,
230
+ "eval_f1": 0.8533986527862829,
231
+ "eval_loss": 0.297758549451828,
232
+ "eval_precision": 0.8572003218020917,
233
+ "eval_recall": 0.8498817966903074,
234
+ "eval_runtime": 1.8319,
235
+ "eval_samples_per_second": 217.802,
236
+ "eval_steps_per_second": 27.293,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
+ "grad_norm": 1.1223020553588867,
242
  "learning_rate": 1.75e-05,
243
+ "loss": 0.1775,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
  "eval_accuracy": 0.8796992481203008,
249
+ "eval_f1": 0.8493865995596099,
250
+ "eval_loss": 0.3038978576660156,
251
+ "eval_precision": 0.8651108632904749,
252
+ "eval_recall": 0.8373795235497363,
253
+ "eval_runtime": 1.8374,
254
+ "eval_samples_per_second": 217.149,
255
+ "eval_steps_per_second": 27.212,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
+ "grad_norm": 7.688318252563477,
261
  "learning_rate": 1.5e-05,
262
+ "loss": 0.1719,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
+ "eval_accuracy": 0.8872180451127819,
268
+ "eval_f1": 0.8622036668943447,
269
+ "eval_loss": 0.3036334812641144,
270
+ "eval_precision": 0.8671602787456446,
271
+ "eval_recall": 0.8577014002545917,
272
+ "eval_runtime": 1.8354,
273
+ "eval_samples_per_second": 217.392,
274
+ "eval_steps_per_second": 27.242,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
+ "grad_norm": 1.8339260816574097,
280
  "learning_rate": 1.25e-05,
281
+ "loss": 0.1621,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
+ "eval_accuracy": 0.8822055137844611,
287
+ "eval_f1": 0.8596342841745197,
288
+ "eval_loss": 0.299029141664505,
289
+ "eval_precision": 0.8555364857667042,
290
+ "eval_recall": 0.8641571194762684,
291
+ "eval_runtime": 1.8346,
292
+ "eval_samples_per_second": 217.487,
293
+ "eval_steps_per_second": 27.254,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
+ "grad_norm": 1.4192665815353394,
299
  "learning_rate": 1e-05,
300
+ "loss": 0.1535,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
+ "eval_accuracy": 0.8847117794486216,
306
+ "eval_f1": 0.8616171059774413,
307
+ "eval_loss": 0.3039585053920746,
308
+ "eval_precision": 0.859873949579832,
309
+ "eval_recall": 0.8634297144935443,
310
+ "eval_runtime": 1.8339,
311
+ "eval_samples_per_second": 217.57,
312
+ "eval_steps_per_second": 27.264,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
+ "grad_norm": 1.2236837148666382,
318
  "learning_rate": 7.5e-06,
319
+ "loss": 0.1504,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
+ "eval_accuracy": 0.8796992481203008,
325
+ "eval_f1": 0.8510452961672474,
326
+ "eval_loss": 0.31895172595977783,
327
+ "eval_precision": 0.8615574190244527,
328
+ "eval_recall": 0.8423804328059648,
329
+ "eval_runtime": 1.8322,
330
+ "eval_samples_per_second": 217.776,
331
+ "eval_steps_per_second": 27.29,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
+ "grad_norm": 1.8861066102981567,
337
  "learning_rate": 5e-06,
338
+ "loss": 0.1459,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
+ "eval_accuracy": 0.8771929824561403,
344
+ "eval_f1": 0.8522278069611882,
345
+ "eval_loss": 0.31010520458221436,
346
+ "eval_precision": 0.8513631702756499,
347
+ "eval_recall": 0.8531096563011457,
348
+ "eval_runtime": 1.8303,
349
+ "eval_samples_per_second": 217.993,
350
+ "eval_steps_per_second": 27.317,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
+ "grad_norm": 2.291304349899292,
356
  "learning_rate": 2.5e-06,
357
+ "loss": 0.1444,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
+ "eval_accuracy": 0.8822055137844611,
363
+ "eval_f1": 0.855319904024935,
364
+ "eval_loss": 0.3119203448295593,
365
+ "eval_precision": 0.862378106322743,
366
+ "eval_recall": 0.8491543917075832,
367
+ "eval_runtime": 1.8361,
368
+ "eval_samples_per_second": 217.305,
369
+ "eval_steps_per_second": 27.231,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
+ "grad_norm": 1.763808250427246,
375
  "learning_rate": 0.0,
376
+ "loss": 0.1384,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
+ "eval_accuracy": 0.8847117794486216,
382
+ "eval_f1": 0.8609292598654301,
383
+ "eval_loss": 0.3090469241142273,
384
+ "eval_precision": 0.8609292598654301,
385
+ "eval_recall": 0.8609292598654301,
386
+ "eval_runtime": 1.8381,
387
+ "eval_samples_per_second": 217.076,
388
+ "eval_steps_per_second": 27.203,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 8444128359504000.0,
395
+ "train_loss": 0.242632052937492,
396
+ "train_runtime": 627.3825,
397
+ "train_samples_per_second": 115.974,
398
+ "train_steps_per_second": 3.889
399
  }
400
  ],
401
  "logging_steps": 500,