End of training
Browse files- README.md +2 -0
- all_results.json +15 -15
- eval_results.json +7 -7
- predict_results.txt +177 -177
- runs/May20_05-56-39_indolem-petl-vm/events.out.tfevents.1716186904.indolem-petl-vm.2737008.1 +3 -0
- test_results.json +4 -4
- train_results.json +4 -4
- trainer_state.json +202 -202
README.md
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
---
|
|
|
|
|
2 |
license: mit
|
3 |
base_model: indolem/indobert-base-uncased
|
4 |
tags:
|
|
|
1 |
---
|
2 |
+
language:
|
3 |
+
- id
|
4 |
license: mit
|
5 |
base_model: indolem/indobert-base-uncased
|
6 |
tags:
|
all_results.json
CHANGED
@@ -1,21 +1,21 @@
|
|
1 |
{
|
2 |
-
"accuracy": 0.
|
3 |
"epoch": 20.0,
|
4 |
"eval_accuracy": 0.9022556390977443,
|
5 |
-
"eval_f1": 0.
|
6 |
-
"eval_loss": 0.
|
7 |
-
"eval_precision": 0.
|
8 |
-
"eval_recall": 0.
|
9 |
-
"eval_runtime": 5.
|
10 |
"eval_samples": 399,
|
11 |
-
"eval_samples_per_second":
|
12 |
-
"eval_steps_per_second": 8.
|
13 |
-
"f1": 0.
|
14 |
-
"precision": 0.
|
15 |
-
"recall": 0.
|
16 |
-
"train_loss": 0.
|
17 |
-
"train_runtime":
|
18 |
"train_samples": 3638,
|
19 |
-
"train_samples_per_second": 31.
|
20 |
-
"train_steps_per_second": 1.
|
21 |
}
|
|
|
1 |
{
|
2 |
+
"accuracy": 0.9090009891196835,
|
3 |
"epoch": 20.0,
|
4 |
"eval_accuracy": 0.9022556390977443,
|
5 |
+
"eval_f1": 0.8811928811928812,
|
6 |
+
"eval_loss": 0.29284632205963135,
|
7 |
+
"eval_precision": 0.8842105263157894,
|
8 |
+
"eval_recall": 0.878341516639389,
|
9 |
+
"eval_runtime": 5.5622,
|
10 |
"eval_samples": 399,
|
11 |
+
"eval_samples_per_second": 71.734,
|
12 |
+
"eval_steps_per_second": 8.989,
|
13 |
+
"f1": 0.8907735522904062,
|
14 |
+
"precision": 0.8900210970464135,
|
15 |
+
"recall": 0.8915373175070833,
|
16 |
+
"train_loss": 0.2130153269064231,
|
17 |
+
"train_runtime": 2280.4037,
|
18 |
"train_samples": 3638,
|
19 |
+
"train_samples_per_second": 31.907,
|
20 |
+
"train_steps_per_second": 1.07
|
21 |
}
|
eval_results.json
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
{
|
2 |
"epoch": 20.0,
|
3 |
"eval_accuracy": 0.9022556390977443,
|
4 |
-
"eval_f1": 0.
|
5 |
-
"eval_loss": 0.
|
6 |
-
"eval_precision": 0.
|
7 |
-
"eval_recall": 0.
|
8 |
-
"eval_runtime": 5.
|
9 |
"eval_samples": 399,
|
10 |
-
"eval_samples_per_second":
|
11 |
-
"eval_steps_per_second": 8.
|
12 |
}
|
|
|
1 |
{
|
2 |
"epoch": 20.0,
|
3 |
"eval_accuracy": 0.9022556390977443,
|
4 |
+
"eval_f1": 0.8811928811928812,
|
5 |
+
"eval_loss": 0.29284632205963135,
|
6 |
+
"eval_precision": 0.8842105263157894,
|
7 |
+
"eval_recall": 0.878341516639389,
|
8 |
+
"eval_runtime": 5.5622,
|
9 |
"eval_samples": 399,
|
10 |
+
"eval_samples_per_second": 71.734,
|
11 |
+
"eval_steps_per_second": 8.989
|
12 |
}
|
predict_results.txt
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
index prediction
|
2 |
0 1
|
3 |
-
1
|
4 |
2 1
|
5 |
3 1
|
6 |
-
4
|
7 |
-
5
|
8 |
6 1
|
9 |
7 1
|
10 |
8 0
|
@@ -14,7 +14,7 @@ index prediction
|
|
14 |
12 1
|
15 |
13 1
|
16 |
14 1
|
17 |
-
15
|
18 |
16 1
|
19 |
17 1
|
20 |
18 1
|
@@ -24,49 +24,49 @@ index prediction
|
|
24 |
22 0
|
25 |
23 1
|
26 |
24 1
|
27 |
-
25
|
28 |
-
26
|
29 |
27 1
|
30 |
28 1
|
31 |
29 1
|
32 |
30 1
|
33 |
31 1
|
34 |
-
32
|
35 |
33 1
|
36 |
-
34
|
37 |
35 1
|
38 |
36 1
|
39 |
-
37
|
40 |
38 1
|
41 |
-
39
|
42 |
40 1
|
43 |
-
41
|
44 |
42 1
|
45 |
-
43
|
46 |
-
44
|
47 |
45 0
|
48 |
-
46
|
49 |
47 1
|
50 |
48 1
|
51 |
-
49
|
52 |
50 1
|
53 |
-
51
|
54 |
-
52
|
55 |
53 1
|
56 |
54 1
|
57 |
55 1
|
58 |
56 1
|
59 |
-
57
|
60 |
58 1
|
61 |
59 1
|
62 |
-
60
|
63 |
61 1
|
64 |
62 1
|
65 |
63 1
|
66 |
-
64
|
67 |
65 1
|
68 |
66 1
|
69 |
-
67
|
70 |
68 1
|
71 |
69 1
|
72 |
70 1
|
@@ -74,37 +74,37 @@ index prediction
|
|
74 |
72 1
|
75 |
73 1
|
76 |
74 1
|
77 |
-
75
|
78 |
-
76
|
79 |
77 1
|
80 |
78 1
|
81 |
79 1
|
82 |
80 1
|
83 |
-
81
|
84 |
-
82
|
85 |
-
83
|
86 |
84 1
|
87 |
-
85
|
88 |
86 1
|
89 |
87 1
|
90 |
88 1
|
91 |
89 1
|
92 |
-
90
|
93 |
91 1
|
94 |
92 1
|
95 |
93 1
|
96 |
94 1
|
97 |
95 1
|
98 |
96 1
|
99 |
-
97
|
100 |
98 1
|
101 |
-
99
|
102 |
100 1
|
103 |
-
101
|
104 |
-
102
|
105 |
103 1
|
106 |
104 1
|
107 |
-
105
|
108 |
106 1
|
109 |
107 1
|
110 |
108 1
|
@@ -114,28 +114,28 @@ index prediction
|
|
114 |
112 1
|
115 |
113 1
|
116 |
114 1
|
117 |
-
115
|
118 |
116 1
|
119 |
117 1
|
120 |
-
118
|
121 |
119 1
|
122 |
-
120
|
123 |
-
121
|
124 |
122 1
|
125 |
123 1
|
126 |
-
124
|
127 |
125 1
|
128 |
126 1
|
129 |
-
127
|
130 |
128 1
|
131 |
129 1
|
132 |
-
130
|
133 |
-
131
|
134 |
132 1
|
135 |
133 1
|
136 |
134 1
|
137 |
135 1
|
138 |
-
136
|
139 |
137 1
|
140 |
138 1
|
141 |
139 1
|
@@ -150,11 +150,11 @@ index prediction
|
|
150 |
148 1
|
151 |
149 1
|
152 |
150 1
|
153 |
-
151
|
154 |
152 1
|
155 |
153 1
|
156 |
154 1
|
157 |
-
155
|
158 |
156 1
|
159 |
157 1
|
160 |
158 1
|
@@ -164,23 +164,23 @@ index prediction
|
|
164 |
162 1
|
165 |
163 1
|
166 |
164 1
|
167 |
-
165
|
168 |
166 1
|
169 |
-
167
|
170 |
-
168
|
171 |
169 1
|
172 |
-
170
|
173 |
171 1
|
174 |
172 1
|
175 |
-
173
|
176 |
-
174
|
177 |
175 1
|
178 |
176 1
|
179 |
-
177
|
180 |
178 1
|
181 |
-
179
|
182 |
180 1
|
183 |
-
181
|
184 |
182 1
|
185 |
183 1
|
186 |
184 1
|
@@ -191,20 +191,20 @@ index prediction
|
|
191 |
189 1
|
192 |
190 1
|
193 |
191 1
|
194 |
-
192
|
195 |
193 1
|
196 |
194 1
|
197 |
195 1
|
198 |
196 1
|
199 |
197 1
|
200 |
198 1
|
201 |
-
199
|
202 |
200 1
|
203 |
201 1
|
204 |
202 1
|
205 |
203 1
|
206 |
204 1
|
207 |
-
205
|
208 |
206 1
|
209 |
207 1
|
210 |
208 1
|
@@ -213,20 +213,20 @@ index prediction
|
|
213 |
211 1
|
214 |
212 1
|
215 |
213 1
|
216 |
-
214
|
217 |
-
215
|
218 |
-
216
|
219 |
217 1
|
220 |
-
218
|
221 |
219 1
|
222 |
-
220
|
223 |
221 1
|
224 |
222 1
|
225 |
223 1
|
226 |
224 0
|
227 |
225 1
|
228 |
-
226
|
229 |
-
227
|
230 |
228 1
|
231 |
229 0
|
232 |
230 1
|
@@ -234,9 +234,9 @@ index prediction
|
|
234 |
232 1
|
235 |
233 1
|
236 |
234 1
|
237 |
-
235
|
238 |
236 1
|
239 |
-
237
|
240 |
238 1
|
241 |
239 1
|
242 |
240 1
|
@@ -245,14 +245,14 @@ index prediction
|
|
245 |
243 1
|
246 |
244 1
|
247 |
245 1
|
248 |
-
246
|
249 |
247 1
|
250 |
-
248
|
251 |
249 1
|
252 |
-
250
|
253 |
251 1
|
254 |
252 1
|
255 |
-
253
|
256 |
254 1
|
257 |
255 1
|
258 |
256 1
|
@@ -267,20 +267,20 @@ index prediction
|
|
267 |
265 1
|
268 |
266 1
|
269 |
267 1
|
270 |
-
268
|
271 |
269 1
|
272 |
270 1
|
273 |
271 1
|
274 |
-
272
|
275 |
273 1
|
276 |
274 1
|
277 |
-
275
|
278 |
-
276
|
279 |
277 1
|
280 |
278 1
|
281 |
279 1
|
282 |
-
280
|
283 |
-
281
|
284 |
282 1
|
285 |
283 1
|
286 |
284 1
|
@@ -288,7 +288,7 @@ index prediction
|
|
288 |
286 1
|
289 |
287 1
|
290 |
288 1
|
291 |
-
289
|
292 |
290 1
|
293 |
291 1
|
294 |
292 1
|
@@ -301,33 +301,33 @@ index prediction
|
|
301 |
299 0
|
302 |
300 0
|
303 |
301 0
|
304 |
-
302
|
305 |
303 0
|
306 |
304 0
|
307 |
-
305
|
308 |
306 0
|
309 |
307 0
|
310 |
308 0
|
311 |
-
309
|
312 |
310 0
|
313 |
311 0
|
314 |
312 0
|
315 |
313 0
|
316 |
-
314
|
317 |
315 0
|
318 |
-
316
|
319 |
317 0
|
320 |
-
318
|
321 |
319 0
|
322 |
320 0
|
323 |
321 0
|
324 |
322 0
|
325 |
-
323
|
326 |
-
324
|
327 |
325 0
|
328 |
326 0
|
329 |
327 0
|
330 |
-
328
|
331 |
329 0
|
332 |
330 0
|
333 |
331 0
|
@@ -337,11 +337,11 @@ index prediction
|
|
337 |
335 0
|
338 |
336 0
|
339 |
337 0
|
340 |
-
338
|
341 |
339 0
|
342 |
-
340
|
343 |
341 0
|
344 |
-
342
|
345 |
343 0
|
346 |
344 0
|
347 |
345 0
|
@@ -350,7 +350,7 @@ index prediction
|
|
350 |
348 0
|
351 |
349 0
|
352 |
350 0
|
353 |
-
351
|
354 |
352 0
|
355 |
353 0
|
356 |
354 0
|
@@ -365,7 +365,7 @@ index prediction
|
|
365 |
363 0
|
366 |
364 0
|
367 |
365 0
|
368 |
-
366
|
369 |
367 0
|
370 |
368 0
|
371 |
369 0
|
@@ -381,7 +381,7 @@ index prediction
|
|
381 |
379 0
|
382 |
380 0
|
383 |
381 0
|
384 |
-
382
|
385 |
383 0
|
386 |
384 0
|
387 |
385 0
|
@@ -400,8 +400,8 @@ index prediction
|
|
400 |
398 1
|
401 |
399 0
|
402 |
400 0
|
403 |
-
401
|
404 |
-
402
|
405 |
403 0
|
406 |
404 0
|
407 |
405 0
|
@@ -419,26 +419,26 @@ index prediction
|
|
419 |
417 0
|
420 |
418 0
|
421 |
419 0
|
422 |
-
420
|
423 |
421 0
|
424 |
422 0
|
425 |
423 0
|
426 |
424 0
|
427 |
-
425
|
428 |
426 0
|
429 |
427 0
|
430 |
428 0
|
431 |
-
429
|
432 |
430 0
|
433 |
431 0
|
434 |
432 0
|
435 |
433 0
|
436 |
-
434
|
437 |
-
435
|
438 |
436 0
|
439 |
437 0
|
440 |
438 0
|
441 |
-
439
|
442 |
440 0
|
443 |
441 0
|
444 |
442 0
|
@@ -446,7 +446,7 @@ index prediction
|
|
446 |
444 0
|
447 |
445 0
|
448 |
446 0
|
449 |
-
447
|
450 |
448 0
|
451 |
449 0
|
452 |
450 0
|
@@ -454,15 +454,15 @@ index prediction
|
|
454 |
452 0
|
455 |
453 0
|
456 |
454 0
|
457 |
-
455
|
458 |
456 0
|
459 |
-
457
|
460 |
458 0
|
461 |
459 0
|
462 |
460 0
|
463 |
461 0
|
464 |
462 0
|
465 |
-
463
|
466 |
464 0
|
467 |
465 0
|
468 |
466 0
|
@@ -486,18 +486,18 @@ index prediction
|
|
486 |
484 0
|
487 |
485 0
|
488 |
486 0
|
489 |
-
487
|
490 |
488 0
|
491 |
489 0
|
492 |
490 0
|
493 |
-
491
|
494 |
492 0
|
495 |
493 0
|
496 |
494 0
|
497 |
495 0
|
498 |
496 0
|
499 |
497 0
|
500 |
-
498
|
501 |
499 0
|
502 |
500 0
|
503 |
501 0
|
@@ -510,7 +510,7 @@ index prediction
|
|
510 |
508 0
|
511 |
509 0
|
512 |
510 0
|
513 |
-
511
|
514 |
512 0
|
515 |
513 0
|
516 |
514 0
|
@@ -521,14 +521,14 @@ index prediction
|
|
521 |
519 0
|
522 |
520 0
|
523 |
521 0
|
524 |
-
522
|
525 |
-
523
|
526 |
524 0
|
527 |
525 0
|
528 |
526 0
|
529 |
527 0
|
530 |
528 0
|
531 |
-
529
|
532 |
530 0
|
533 |
531 0
|
534 |
532 0
|
@@ -559,7 +559,7 @@ index prediction
|
|
559 |
557 0
|
560 |
558 0
|
561 |
559 0
|
562 |
-
560
|
563 |
561 0
|
564 |
562 0
|
565 |
563 0
|
@@ -578,10 +578,10 @@ index prediction
|
|
578 |
576 0
|
579 |
577 0
|
580 |
578 0
|
581 |
-
579
|
582 |
580 0
|
583 |
-
581
|
584 |
-
582
|
585 |
583 0
|
586 |
584 0
|
587 |
585 0
|
@@ -590,12 +590,12 @@ index prediction
|
|
590 |
588 0
|
591 |
589 0
|
592 |
590 0
|
593 |
-
591
|
594 |
592 0
|
595 |
593 0
|
596 |
594 0
|
597 |
595 0
|
598 |
-
596
|
599 |
597 0
|
600 |
598 0
|
601 |
599 0
|
@@ -607,10 +607,10 @@ index prediction
|
|
607 |
605 0
|
608 |
606 0
|
609 |
607 0
|
610 |
-
608
|
611 |
609 0
|
612 |
-
610
|
613 |
-
611
|
614 |
612 0
|
615 |
613 0
|
616 |
614 0
|
@@ -620,7 +620,7 @@ index prediction
|
|
620 |
618 0
|
621 |
619 0
|
622 |
620 0
|
623 |
-
621
|
624 |
622 0
|
625 |
623 0
|
626 |
624 0
|
@@ -632,13 +632,13 @@ index prediction
|
|
632 |
630 0
|
633 |
631 0
|
634 |
632 0
|
635 |
-
633
|
636 |
634 0
|
637 |
-
635
|
638 |
636 0
|
639 |
637 0
|
640 |
638 0
|
641 |
-
639
|
642 |
640 0
|
643 |
641 0
|
644 |
642 0
|
@@ -651,12 +651,12 @@ index prediction
|
|
651 |
649 0
|
652 |
650 0
|
653 |
651 0
|
654 |
-
652
|
655 |
653 0
|
656 |
654 0
|
657 |
655 0
|
658 |
656 0
|
659 |
-
657
|
660 |
658 0
|
661 |
659 0
|
662 |
660 0
|
@@ -664,15 +664,15 @@ index prediction
|
|
664 |
662 0
|
665 |
663 0
|
666 |
664 0
|
667 |
-
665
|
668 |
666 0
|
669 |
-
667
|
670 |
668 0
|
671 |
-
669
|
672 |
670 0
|
673 |
671 0
|
674 |
672 0
|
675 |
-
673
|
676 |
674 0
|
677 |
675 0
|
678 |
676 0
|
@@ -687,7 +687,7 @@ index prediction
|
|
687 |
685 0
|
688 |
686 0
|
689 |
687 0
|
690 |
-
688
|
691 |
689 0
|
692 |
690 0
|
693 |
691 0
|
@@ -709,12 +709,12 @@ index prediction
|
|
709 |
707 0
|
710 |
708 0
|
711 |
709 0
|
712 |
-
710
|
713 |
711 0
|
714 |
-
712
|
715 |
713 0
|
716 |
714 0
|
717 |
-
715
|
718 |
716 0
|
719 |
717 0
|
720 |
718 0
|
@@ -726,24 +726,24 @@ index prediction
|
|
726 |
724 0
|
727 |
725 0
|
728 |
726 0
|
729 |
-
727
|
730 |
-
728
|
731 |
729 0
|
732 |
730 0
|
733 |
731 0
|
734 |
732 0
|
735 |
-
733
|
736 |
-
734
|
737 |
735 0
|
738 |
736 0
|
739 |
-
737
|
740 |
738 0
|
741 |
739 0
|
742 |
740 0
|
743 |
741 0
|
744 |
742 0
|
745 |
743 0
|
746 |
-
744
|
747 |
745 0
|
748 |
746 0
|
749 |
747 0
|
@@ -755,28 +755,28 @@ index prediction
|
|
755 |
753 0
|
756 |
754 0
|
757 |
755 0
|
758 |
-
756
|
759 |
757 0
|
760 |
758 0
|
761 |
759 0
|
762 |
760 0
|
763 |
761 0
|
764 |
-
762
|
765 |
763 0
|
766 |
764 0
|
767 |
-
765
|
768 |
766 0
|
769 |
767 0
|
770 |
768 0
|
771 |
769 0
|
772 |
-
770
|
773 |
771 0
|
774 |
772 0
|
775 |
773 0
|
776 |
-
774
|
777 |
775 0
|
778 |
776 0
|
779 |
-
777
|
780 |
778 0
|
781 |
779 0
|
782 |
780 0
|
@@ -785,7 +785,7 @@ index prediction
|
|
785 |
783 0
|
786 |
784 0
|
787 |
785 0
|
788 |
-
786
|
789 |
787 0
|
790 |
788 0
|
791 |
789 0
|
@@ -813,12 +813,12 @@ index prediction
|
|
813 |
811 0
|
814 |
812 0
|
815 |
813 0
|
816 |
-
814
|
817 |
815 0
|
818 |
816 0
|
819 |
817 0
|
820 |
818 0
|
821 |
-
819
|
822 |
820 0
|
823 |
821 0
|
824 |
822 0
|
@@ -831,8 +831,8 @@ index prediction
|
|
831 |
829 0
|
832 |
830 0
|
833 |
831 0
|
834 |
-
832
|
835 |
-
833
|
836 |
834 0
|
837 |
835 0
|
838 |
836 0
|
@@ -848,15 +848,15 @@ index prediction
|
|
848 |
846 0
|
849 |
847 0
|
850 |
848 0
|
851 |
-
849
|
852 |
850 0
|
853 |
-
851
|
854 |
852 0
|
855 |
853 0
|
856 |
-
854
|
857 |
855 0
|
858 |
856 0
|
859 |
-
857
|
860 |
858 0
|
861 |
859 0
|
862 |
860 0
|
@@ -864,10 +864,10 @@ index prediction
|
|
864 |
862 0
|
865 |
863 0
|
866 |
864 0
|
867 |
-
865
|
868 |
866 0
|
869 |
867 0
|
870 |
-
868
|
871 |
869 0
|
872 |
870 0
|
873 |
871 0
|
@@ -882,7 +882,7 @@ index prediction
|
|
882 |
880 0
|
883 |
881 0
|
884 |
882 0
|
885 |
-
883
|
886 |
884 0
|
887 |
885 0
|
888 |
886 0
|
@@ -890,13 +890,13 @@ index prediction
|
|
890 |
888 0
|
891 |
889 0
|
892 |
890 0
|
893 |
-
891
|
894 |
892 0
|
895 |
-
893
|
896 |
894 0
|
897 |
895 0
|
898 |
-
896
|
899 |
-
897
|
900 |
898 0
|
901 |
899 0
|
902 |
900 0
|
@@ -905,8 +905,8 @@ index prediction
|
|
905 |
903 0
|
906 |
904 0
|
907 |
905 0
|
908 |
-
906
|
909 |
-
907
|
910 |
908 0
|
911 |
909 0
|
912 |
910 0
|
@@ -915,7 +915,7 @@ index prediction
|
|
915 |
913 0
|
916 |
914 0
|
917 |
915 0
|
918 |
-
916
|
919 |
917 0
|
920 |
918 0
|
921 |
919 0
|
@@ -927,36 +927,36 @@ index prediction
|
|
927 |
925 0
|
928 |
926 0
|
929 |
927 0
|
930 |
-
928
|
931 |
929 0
|
932 |
930 0
|
933 |
931 0
|
934 |
932 0
|
935 |
933 0
|
936 |
934 0
|
937 |
-
935
|
938 |
-
936
|
939 |
937 0
|
940 |
938 0
|
941 |
939 0
|
942 |
940 0
|
943 |
941 0
|
944 |
942 0
|
945 |
-
943
|
946 |
944 0
|
947 |
945 0
|
948 |
946 0
|
949 |
947 0
|
950 |
948 0
|
951 |
949 0
|
952 |
-
950
|
953 |
951 0
|
954 |
952 0
|
955 |
953 0
|
956 |
954 0
|
957 |
955 0
|
958 |
956 0
|
959 |
-
957
|
960 |
958 0
|
961 |
959 0
|
962 |
960 0
|
@@ -968,7 +968,7 @@ index prediction
|
|
968 |
966 0
|
969 |
967 0
|
970 |
968 0
|
971 |
-
969
|
972 |
970 0
|
973 |
971 0
|
974 |
972 0
|
@@ -979,26 +979,26 @@ index prediction
|
|
979 |
977 0
|
980 |
978 0
|
981 |
979 0
|
982 |
-
980
|
983 |
981 0
|
984 |
982 0
|
985 |
983 0
|
986 |
984 0
|
987 |
985 0
|
988 |
-
986
|
989 |
987 0
|
990 |
988 0
|
991 |
989 0
|
992 |
990 0
|
993 |
-
991
|
994 |
992 0
|
995 |
-
993
|
996 |
994 0
|
997 |
-
995
|
998 |
996 0
|
999 |
997 0
|
1000 |
998 0
|
1001 |
-
999
|
1002 |
1000 0
|
1003 |
1001 0
|
1004 |
1002 0
|
|
|
1 |
index prediction
|
2 |
0 1
|
3 |
+
1 1
|
4 |
2 1
|
5 |
3 1
|
6 |
+
4 1
|
7 |
+
5 0
|
8 |
6 1
|
9 |
7 1
|
10 |
8 0
|
|
|
14 |
12 1
|
15 |
13 1
|
16 |
14 1
|
17 |
+
15 1
|
18 |
16 1
|
19 |
17 1
|
20 |
18 1
|
|
|
24 |
22 0
|
25 |
23 1
|
26 |
24 1
|
27 |
+
25 0
|
28 |
+
26 0
|
29 |
27 1
|
30 |
28 1
|
31 |
29 1
|
32 |
30 1
|
33 |
31 1
|
34 |
+
32 0
|
35 |
33 1
|
36 |
+
34 1
|
37 |
35 1
|
38 |
36 1
|
39 |
+
37 1
|
40 |
38 1
|
41 |
+
39 1
|
42 |
40 1
|
43 |
+
41 0
|
44 |
42 1
|
45 |
+
43 1
|
46 |
+
44 1
|
47 |
45 0
|
48 |
+
46 0
|
49 |
47 1
|
50 |
48 1
|
51 |
+
49 1
|
52 |
50 1
|
53 |
+
51 0
|
54 |
+
52 1
|
55 |
53 1
|
56 |
54 1
|
57 |
55 1
|
58 |
56 1
|
59 |
+
57 1
|
60 |
58 1
|
61 |
59 1
|
62 |
+
60 0
|
63 |
61 1
|
64 |
62 1
|
65 |
63 1
|
66 |
+
64 0
|
67 |
65 1
|
68 |
66 1
|
69 |
+
67 0
|
70 |
68 1
|
71 |
69 1
|
72 |
70 1
|
|
|
74 |
72 1
|
75 |
73 1
|
76 |
74 1
|
77 |
+
75 0
|
78 |
+
76 0
|
79 |
77 1
|
80 |
78 1
|
81 |
79 1
|
82 |
80 1
|
83 |
+
81 1
|
84 |
+
82 0
|
85 |
+
83 0
|
86 |
84 1
|
87 |
+
85 1
|
88 |
86 1
|
89 |
87 1
|
90 |
88 1
|
91 |
89 1
|
92 |
+
90 0
|
93 |
91 1
|
94 |
92 1
|
95 |
93 1
|
96 |
94 1
|
97 |
95 1
|
98 |
96 1
|
99 |
+
97 1
|
100 |
98 1
|
101 |
+
99 1
|
102 |
100 1
|
103 |
+
101 1
|
104 |
+
102 0
|
105 |
103 1
|
106 |
104 1
|
107 |
+
105 0
|
108 |
106 1
|
109 |
107 1
|
110 |
108 1
|
|
|
114 |
112 1
|
115 |
113 1
|
116 |
114 1
|
117 |
+
115 0
|
118 |
116 1
|
119 |
117 1
|
120 |
+
118 1
|
121 |
119 1
|
122 |
+
120 0
|
123 |
+
121 0
|
124 |
122 1
|
125 |
123 1
|
126 |
+
124 0
|
127 |
125 1
|
128 |
126 1
|
129 |
+
127 0
|
130 |
128 1
|
131 |
129 1
|
132 |
+
130 0
|
133 |
+
131 1
|
134 |
132 1
|
135 |
133 1
|
136 |
134 1
|
137 |
135 1
|
138 |
+
136 1
|
139 |
137 1
|
140 |
138 1
|
141 |
139 1
|
|
|
150 |
148 1
|
151 |
149 1
|
152 |
150 1
|
153 |
+
151 0
|
154 |
152 1
|
155 |
153 1
|
156 |
154 1
|
157 |
+
155 1
|
158 |
156 1
|
159 |
157 1
|
160 |
158 1
|
|
|
164 |
162 1
|
165 |
163 1
|
166 |
164 1
|
167 |
+
165 1
|
168 |
166 1
|
169 |
+
167 0
|
170 |
+
168 1
|
171 |
169 1
|
172 |
+
170 0
|
173 |
171 1
|
174 |
172 1
|
175 |
+
173 1
|
176 |
+
174 0
|
177 |
175 1
|
178 |
176 1
|
179 |
+
177 1
|
180 |
178 1
|
181 |
+
179 0
|
182 |
180 1
|
183 |
+
181 0
|
184 |
182 1
|
185 |
183 1
|
186 |
184 1
|
|
|
191 |
189 1
|
192 |
190 1
|
193 |
191 1
|
194 |
+
192 0
|
195 |
193 1
|
196 |
194 1
|
197 |
195 1
|
198 |
196 1
|
199 |
197 1
|
200 |
198 1
|
201 |
+
199 1
|
202 |
200 1
|
203 |
201 1
|
204 |
202 1
|
205 |
203 1
|
206 |
204 1
|
207 |
+
205 1
|
208 |
206 1
|
209 |
207 1
|
210 |
208 1
|
|
|
213 |
211 1
|
214 |
212 1
|
215 |
213 1
|
216 |
+
214 1
|
217 |
+
215 0
|
218 |
+
216 1
|
219 |
217 1
|
220 |
+
218 1
|
221 |
219 1
|
222 |
+
220 1
|
223 |
221 1
|
224 |
222 1
|
225 |
223 1
|
226 |
224 0
|
227 |
225 1
|
228 |
+
226 1
|
229 |
+
227 1
|
230 |
228 1
|
231 |
229 0
|
232 |
230 1
|
|
|
234 |
232 1
|
235 |
233 1
|
236 |
234 1
|
237 |
+
235 0
|
238 |
236 1
|
239 |
+
237 0
|
240 |
238 1
|
241 |
239 1
|
242 |
240 1
|
|
|
245 |
243 1
|
246 |
244 1
|
247 |
245 1
|
248 |
+
246 1
|
249 |
247 1
|
250 |
+
248 0
|
251 |
249 1
|
252 |
+
250 1
|
253 |
251 1
|
254 |
252 1
|
255 |
+
253 0
|
256 |
254 1
|
257 |
255 1
|
258 |
256 1
|
|
|
267 |
265 1
|
268 |
266 1
|
269 |
267 1
|
270 |
+
268 0
|
271 |
269 1
|
272 |
270 1
|
273 |
271 1
|
274 |
+
272 0
|
275 |
273 1
|
276 |
274 1
|
277 |
+
275 0
|
278 |
+
276 0
|
279 |
277 1
|
280 |
278 1
|
281 |
279 1
|
282 |
+
280 1
|
283 |
+
281 0
|
284 |
282 1
|
285 |
283 1
|
286 |
284 1
|
|
|
288 |
286 1
|
289 |
287 1
|
290 |
288 1
|
291 |
+
289 1
|
292 |
290 1
|
293 |
291 1
|
294 |
292 1
|
|
|
301 |
299 0
|
302 |
300 0
|
303 |
301 0
|
304 |
+
302 0
|
305 |
303 0
|
306 |
304 0
|
307 |
+
305 0
|
308 |
306 0
|
309 |
307 0
|
310 |
308 0
|
311 |
+
309 1
|
312 |
310 0
|
313 |
311 0
|
314 |
312 0
|
315 |
313 0
|
316 |
+
314 0
|
317 |
315 0
|
318 |
+
316 0
|
319 |
317 0
|
320 |
+
318 0
|
321 |
319 0
|
322 |
320 0
|
323 |
321 0
|
324 |
322 0
|
325 |
+
323 1
|
326 |
+
324 1
|
327 |
325 0
|
328 |
326 0
|
329 |
327 0
|
330 |
+
328 0
|
331 |
329 0
|
332 |
330 0
|
333 |
331 0
|
|
|
337 |
335 0
|
338 |
336 0
|
339 |
337 0
|
340 |
+
338 1
|
341 |
339 0
|
342 |
+
340 1
|
343 |
341 0
|
344 |
+
342 1
|
345 |
343 0
|
346 |
344 0
|
347 |
345 0
|
|
|
350 |
348 0
|
351 |
349 0
|
352 |
350 0
|
353 |
+
351 0
|
354 |
352 0
|
355 |
353 0
|
356 |
354 0
|
|
|
365 |
363 0
|
366 |
364 0
|
367 |
365 0
|
368 |
+
366 0
|
369 |
367 0
|
370 |
368 0
|
371 |
369 0
|
|
|
381 |
379 0
|
382 |
380 0
|
383 |
381 0
|
384 |
+
382 1
|
385 |
383 0
|
386 |
384 0
|
387 |
385 0
|
|
|
400 |
398 1
|
401 |
399 0
|
402 |
400 0
|
403 |
+
401 0
|
404 |
+
402 0
|
405 |
403 0
|
406 |
404 0
|
407 |
405 0
|
|
|
419 |
417 0
|
420 |
418 0
|
421 |
419 0
|
422 |
+
420 0
|
423 |
421 0
|
424 |
422 0
|
425 |
423 0
|
426 |
424 0
|
427 |
+
425 0
|
428 |
426 0
|
429 |
427 0
|
430 |
428 0
|
431 |
+
429 1
|
432 |
430 0
|
433 |
431 0
|
434 |
432 0
|
435 |
433 0
|
436 |
+
434 1
|
437 |
+
435 1
|
438 |
436 0
|
439 |
437 0
|
440 |
438 0
|
441 |
+
439 0
|
442 |
440 0
|
443 |
441 0
|
444 |
442 0
|
|
|
446 |
444 0
|
447 |
445 0
|
448 |
446 0
|
449 |
+
447 0
|
450 |
448 0
|
451 |
449 0
|
452 |
450 0
|
|
|
454 |
452 0
|
455 |
453 0
|
456 |
454 0
|
457 |
+
455 1
|
458 |
456 0
|
459 |
+
457 0
|
460 |
458 0
|
461 |
459 0
|
462 |
460 0
|
463 |
461 0
|
464 |
462 0
|
465 |
+
463 1
|
466 |
464 0
|
467 |
465 0
|
468 |
466 0
|
|
|
486 |
484 0
|
487 |
485 0
|
488 |
486 0
|
489 |
+
487 1
|
490 |
488 0
|
491 |
489 0
|
492 |
490 0
|
493 |
+
491 1
|
494 |
492 0
|
495 |
493 0
|
496 |
494 0
|
497 |
495 0
|
498 |
496 0
|
499 |
497 0
|
500 |
+
498 1
|
501 |
499 0
|
502 |
500 0
|
503 |
501 0
|
|
|
510 |
508 0
|
511 |
509 0
|
512 |
510 0
|
513 |
+
511 0
|
514 |
512 0
|
515 |
513 0
|
516 |
514 0
|
|
|
521 |
519 0
|
522 |
520 0
|
523 |
521 0
|
524 |
+
522 1
|
525 |
+
523 1
|
526 |
524 0
|
527 |
525 0
|
528 |
526 0
|
529 |
527 0
|
530 |
528 0
|
531 |
+
529 0
|
532 |
530 0
|
533 |
531 0
|
534 |
532 0
|
|
|
559 |
557 0
|
560 |
558 0
|
561 |
559 0
|
562 |
+
560 0
|
563 |
561 0
|
564 |
562 0
|
565 |
563 0
|
|
|
578 |
576 0
|
579 |
577 0
|
580 |
578 0
|
581 |
+
579 1
|
582 |
580 0
|
583 |
+
581 1
|
584 |
+
582 1
|
585 |
583 0
|
586 |
584 0
|
587 |
585 0
|
|
|
590 |
588 0
|
591 |
589 0
|
592 |
590 0
|
593 |
+
591 1
|
594 |
592 0
|
595 |
593 0
|
596 |
594 0
|
597 |
595 0
|
598 |
+
596 0
|
599 |
597 0
|
600 |
598 0
|
601 |
599 0
|
|
|
607 |
605 0
|
608 |
606 0
|
609 |
607 0
|
610 |
+
608 0
|
611 |
609 0
|
612 |
+
610 0
|
613 |
+
611 0
|
614 |
612 0
|
615 |
613 0
|
616 |
614 0
|
|
|
620 |
618 0
|
621 |
619 0
|
622 |
620 0
|
623 |
+
621 0
|
624 |
622 0
|
625 |
623 0
|
626 |
624 0
|
|
|
632 |
630 0
|
633 |
631 0
|
634 |
632 0
|
635 |
+
633 0
|
636 |
634 0
|
637 |
+
635 0
|
638 |
636 0
|
639 |
637 0
|
640 |
638 0
|
641 |
+
639 0
|
642 |
640 0
|
643 |
641 0
|
644 |
642 0
|
|
|
651 |
649 0
|
652 |
650 0
|
653 |
651 0
|
654 |
+
652 0
|
655 |
653 0
|
656 |
654 0
|
657 |
655 0
|
658 |
656 0
|
659 |
+
657 0
|
660 |
658 0
|
661 |
659 0
|
662 |
660 0
|
|
|
664 |
662 0
|
665 |
663 0
|
666 |
664 0
|
667 |
+
665 1
|
668 |
666 0
|
669 |
+
667 1
|
670 |
668 0
|
671 |
+
669 0
|
672 |
670 0
|
673 |
671 0
|
674 |
672 0
|
675 |
+
673 1
|
676 |
674 0
|
677 |
675 0
|
678 |
676 0
|
|
|
687 |
685 0
|
688 |
686 0
|
689 |
687 0
|
690 |
+
688 1
|
691 |
689 0
|
692 |
690 0
|
693 |
691 0
|
|
|
709 |
707 0
|
710 |
708 0
|
711 |
709 0
|
712 |
+
710 1
|
713 |
711 0
|
714 |
+
712 1
|
715 |
713 0
|
716 |
714 0
|
717 |
+
715 1
|
718 |
716 0
|
719 |
717 0
|
720 |
718 0
|
|
|
726 |
724 0
|
727 |
725 0
|
728 |
726 0
|
729 |
+
727 0
|
730 |
+
728 0
|
731 |
729 0
|
732 |
730 0
|
733 |
731 0
|
734 |
732 0
|
735 |
+
733 0
|
736 |
+
734 1
|
737 |
735 0
|
738 |
736 0
|
739 |
+
737 1
|
740 |
738 0
|
741 |
739 0
|
742 |
740 0
|
743 |
741 0
|
744 |
742 0
|
745 |
743 0
|
746 |
+
744 1
|
747 |
745 0
|
748 |
746 0
|
749 |
747 0
|
|
|
755 |
753 0
|
756 |
754 0
|
757 |
755 0
|
758 |
+
756 0
|
759 |
757 0
|
760 |
758 0
|
761 |
759 0
|
762 |
760 0
|
763 |
761 0
|
764 |
+
762 1
|
765 |
763 0
|
766 |
764 0
|
767 |
+
765 0
|
768 |
766 0
|
769 |
767 0
|
770 |
768 0
|
771 |
769 0
|
772 |
+
770 0
|
773 |
771 0
|
774 |
772 0
|
775 |
773 0
|
776 |
+
774 1
|
777 |
775 0
|
778 |
776 0
|
779 |
+
777 0
|
780 |
778 0
|
781 |
779 0
|
782 |
780 0
|
|
|
785 |
783 0
|
786 |
784 0
|
787 |
785 0
|
788 |
+
786 1
|
789 |
787 0
|
790 |
788 0
|
791 |
789 0
|
|
|
813 |
811 0
|
814 |
812 0
|
815 |
813 0
|
816 |
+
814 0
|
817 |
815 0
|
818 |
816 0
|
819 |
817 0
|
820 |
818 0
|
821 |
+
819 0
|
822 |
820 0
|
823 |
821 0
|
824 |
822 0
|
|
|
831 |
829 0
|
832 |
830 0
|
833 |
831 0
|
834 |
+
832 0
|
835 |
+
833 0
|
836 |
834 0
|
837 |
835 0
|
838 |
836 0
|
|
|
848 |
846 0
|
849 |
847 0
|
850 |
848 0
|
851 |
+
849 0
|
852 |
850 0
|
853 |
+
851 1
|
854 |
852 0
|
855 |
853 0
|
856 |
+
854 1
|
857 |
855 0
|
858 |
856 0
|
859 |
+
857 1
|
860 |
858 0
|
861 |
859 0
|
862 |
860 0
|
|
|
864 |
862 0
|
865 |
863 0
|
866 |
864 0
|
867 |
+
865 1
|
868 |
866 0
|
869 |
867 0
|
870 |
+
868 0
|
871 |
869 0
|
872 |
870 0
|
873 |
871 0
|
|
|
882 |
880 0
|
883 |
881 0
|
884 |
882 0
|
885 |
+
883 0
|
886 |
884 0
|
887 |
885 0
|
888 |
886 0
|
|
|
890 |
888 0
|
891 |
889 0
|
892 |
890 0
|
893 |
+
891 0
|
894 |
892 0
|
895 |
+
893 1
|
896 |
894 0
|
897 |
895 0
|
898 |
+
896 1
|
899 |
+
897 0
|
900 |
898 0
|
901 |
899 0
|
902 |
900 0
|
|
|
905 |
903 0
|
906 |
904 0
|
907 |
905 0
|
908 |
+
906 0
|
909 |
+
907 0
|
910 |
908 0
|
911 |
909 0
|
912 |
910 0
|
|
|
915 |
913 0
|
916 |
914 0
|
917 |
915 0
|
918 |
+
916 0
|
919 |
917 0
|
920 |
918 0
|
921 |
919 0
|
|
|
927 |
925 0
|
928 |
926 0
|
929 |
927 0
|
930 |
+
928 0
|
931 |
929 0
|
932 |
930 0
|
933 |
931 0
|
934 |
932 0
|
935 |
933 0
|
936 |
934 0
|
937 |
+
935 1
|
938 |
+
936 1
|
939 |
937 0
|
940 |
938 0
|
941 |
939 0
|
942 |
940 0
|
943 |
941 0
|
944 |
942 0
|
945 |
+
943 0
|
946 |
944 0
|
947 |
945 0
|
948 |
946 0
|
949 |
947 0
|
950 |
948 0
|
951 |
949 0
|
952 |
+
950 0
|
953 |
951 0
|
954 |
952 0
|
955 |
953 0
|
956 |
954 0
|
957 |
955 0
|
958 |
956 0
|
959 |
+
957 0
|
960 |
958 0
|
961 |
959 0
|
962 |
960 0
|
|
|
968 |
966 0
|
969 |
967 0
|
970 |
968 0
|
971 |
+
969 0
|
972 |
970 0
|
973 |
971 0
|
974 |
972 0
|
|
|
979 |
977 0
|
980 |
978 0
|
981 |
979 0
|
982 |
+
980 0
|
983 |
981 0
|
984 |
982 0
|
985 |
983 0
|
986 |
984 0
|
987 |
985 0
|
988 |
+
986 0
|
989 |
987 0
|
990 |
988 0
|
991 |
989 0
|
992 |
990 0
|
993 |
+
991 0
|
994 |
992 0
|
995 |
+
993 1
|
996 |
994 0
|
997 |
+
995 1
|
998 |
996 0
|
999 |
997 0
|
1000 |
998 0
|
1001 |
+
999 0
|
1002 |
1000 0
|
1003 |
1001 0
|
1004 |
1002 0
|
runs/May20_05-56-39_indolem-petl-vm/events.out.tfevents.1716186904.indolem-petl-vm.2737008.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9636c835e603941ce911ab7e6300870d7602cc9d7dbe5130e7d4590b71f43a6a
|
3 |
+
size 560
|
test_results.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
-
"accuracy": 0.
|
3 |
-
"f1": 0.
|
4 |
-
"precision": 0.
|
5 |
-
"recall": 0.
|
6 |
}
|
|
|
1 |
{
|
2 |
+
"accuracy": 0.9090009891196835,
|
3 |
+
"f1": 0.8907735522904062,
|
4 |
+
"precision": 0.8900210970464135,
|
5 |
+
"recall": 0.8915373175070833
|
6 |
}
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 20.0,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 3638,
|
6 |
-
"train_samples_per_second": 31.
|
7 |
-
"train_steps_per_second": 1.
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 20.0,
|
3 |
+
"train_loss": 0.2130153269064231,
|
4 |
+
"train_runtime": 2280.4037,
|
5 |
"train_samples": 3638,
|
6 |
+
"train_samples_per_second": 31.907,
|
7 |
+
"train_steps_per_second": 1.07
|
8 |
}
|
trainer_state.json
CHANGED
@@ -10,392 +10,392 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
-
"grad_norm": 4.
|
14 |
"learning_rate": 4.75e-05,
|
15 |
-
"loss": 0.
|
16 |
"step": 122
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.0,
|
20 |
-
"eval_accuracy": 0.
|
21 |
-
"eval_f1": 0.
|
22 |
-
"eval_loss": 0.
|
23 |
-
"eval_precision": 0.
|
24 |
-
"eval_recall": 0.
|
25 |
-
"eval_runtime": 5.
|
26 |
-
"eval_samples_per_second": 75.
|
27 |
-
"eval_steps_per_second": 9.
|
28 |
"step": 122
|
29 |
},
|
30 |
{
|
31 |
"epoch": 2.0,
|
32 |
-
"grad_norm":
|
33 |
"learning_rate": 4.5e-05,
|
34 |
-
"loss": 0.
|
35 |
"step": 244
|
36 |
},
|
37 |
{
|
38 |
"epoch": 2.0,
|
39 |
-
"eval_accuracy": 0.
|
40 |
-
"eval_f1": 0.
|
41 |
-
"eval_loss": 0.
|
42 |
-
"eval_precision": 0.
|
43 |
-
"eval_recall": 0.
|
44 |
-
"eval_runtime": 5.
|
45 |
-
"eval_samples_per_second": 71.
|
46 |
-
"eval_steps_per_second":
|
47 |
"step": 244
|
48 |
},
|
49 |
{
|
50 |
"epoch": 3.0,
|
51 |
-
"grad_norm": 4.
|
52 |
"learning_rate": 4.25e-05,
|
53 |
-
"loss": 0.
|
54 |
"step": 366
|
55 |
},
|
56 |
{
|
57 |
"epoch": 3.0,
|
58 |
-
"eval_accuracy": 0.
|
59 |
-
"eval_f1": 0.
|
60 |
-
"eval_loss": 0.
|
61 |
-
"eval_precision": 0.
|
62 |
-
"eval_recall": 0.
|
63 |
-
"eval_runtime": 5.
|
64 |
-
"eval_samples_per_second": 71.
|
65 |
-
"eval_steps_per_second": 8.
|
66 |
"step": 366
|
67 |
},
|
68 |
{
|
69 |
"epoch": 4.0,
|
70 |
-
"grad_norm":
|
71 |
"learning_rate": 4e-05,
|
72 |
-
"loss": 0.
|
73 |
"step": 488
|
74 |
},
|
75 |
{
|
76 |
"epoch": 4.0,
|
77 |
-
"eval_accuracy": 0.
|
78 |
-
"eval_f1": 0.
|
79 |
-
"eval_loss": 0.
|
80 |
-
"eval_precision": 0.
|
81 |
-
"eval_recall": 0.
|
82 |
-
"eval_runtime": 5.
|
83 |
-
"eval_samples_per_second":
|
84 |
-
"eval_steps_per_second": 8.
|
85 |
"step": 488
|
86 |
},
|
87 |
{
|
88 |
"epoch": 5.0,
|
89 |
-
"grad_norm":
|
90 |
"learning_rate": 3.7500000000000003e-05,
|
91 |
-
"loss": 0.
|
92 |
"step": 610
|
93 |
},
|
94 |
{
|
95 |
"epoch": 5.0,
|
96 |
-
"eval_accuracy": 0.
|
97 |
-
"eval_f1": 0.
|
98 |
-
"eval_loss": 0.
|
99 |
-
"eval_precision": 0.
|
100 |
-
"eval_recall": 0.
|
101 |
-
"eval_runtime": 5.
|
102 |
-
"eval_samples_per_second": 70.
|
103 |
-
"eval_steps_per_second": 8.
|
104 |
"step": 610
|
105 |
},
|
106 |
{
|
107 |
"epoch": 6.0,
|
108 |
-
"grad_norm":
|
109 |
"learning_rate": 3.5e-05,
|
110 |
-
"loss": 0.
|
111 |
"step": 732
|
112 |
},
|
113 |
{
|
114 |
"epoch": 6.0,
|
115 |
-
"eval_accuracy": 0.
|
116 |
-
"eval_f1": 0.
|
117 |
-
"eval_loss": 0.
|
118 |
-
"eval_precision": 0.
|
119 |
-
"eval_recall": 0.
|
120 |
-
"eval_runtime": 5.
|
121 |
-
"eval_samples_per_second": 71.
|
122 |
-
"eval_steps_per_second":
|
123 |
"step": 732
|
124 |
},
|
125 |
{
|
126 |
"epoch": 7.0,
|
127 |
-
"grad_norm": 0.
|
128 |
"learning_rate": 3.2500000000000004e-05,
|
129 |
-
"loss": 0.
|
130 |
"step": 854
|
131 |
},
|
132 |
{
|
133 |
"epoch": 7.0,
|
134 |
-
"eval_accuracy": 0.
|
135 |
-
"eval_f1": 0.
|
136 |
-
"eval_loss": 0.
|
137 |
-
"eval_precision": 0.
|
138 |
-
"eval_recall": 0.
|
139 |
-
"eval_runtime": 5.
|
140 |
-
"eval_samples_per_second": 71.
|
141 |
-
"eval_steps_per_second": 8.
|
142 |
"step": 854
|
143 |
},
|
144 |
{
|
145 |
"epoch": 8.0,
|
146 |
-
"grad_norm": 2.
|
147 |
"learning_rate": 3e-05,
|
148 |
-
"loss": 0.
|
149 |
"step": 976
|
150 |
},
|
151 |
{
|
152 |
"epoch": 8.0,
|
153 |
-
"eval_accuracy": 0.
|
154 |
-
"eval_f1": 0.
|
155 |
-
"eval_loss": 0.
|
156 |
-
"eval_precision": 0.
|
157 |
-
"eval_recall": 0.
|
158 |
-
"eval_runtime": 5.
|
159 |
-
"eval_samples_per_second": 71.
|
160 |
-
"eval_steps_per_second": 8.
|
161 |
"step": 976
|
162 |
},
|
163 |
{
|
164 |
"epoch": 9.0,
|
165 |
-
"grad_norm": 8.
|
166 |
"learning_rate": 2.7500000000000004e-05,
|
167 |
-
"loss": 0.
|
168 |
"step": 1098
|
169 |
},
|
170 |
{
|
171 |
"epoch": 9.0,
|
172 |
-
"eval_accuracy": 0.
|
173 |
-
"eval_f1": 0.
|
174 |
-
"eval_loss": 0.
|
175 |
-
"eval_precision": 0.
|
176 |
-
"eval_recall": 0.
|
177 |
-
"eval_runtime": 5.
|
178 |
-
"eval_samples_per_second": 71.
|
179 |
-
"eval_steps_per_second": 8.
|
180 |
"step": 1098
|
181 |
},
|
182 |
{
|
183 |
"epoch": 10.0,
|
184 |
-
"grad_norm":
|
185 |
"learning_rate": 2.5e-05,
|
186 |
-
"loss": 0.
|
187 |
"step": 1220
|
188 |
},
|
189 |
{
|
190 |
"epoch": 10.0,
|
191 |
-
"eval_accuracy": 0.
|
192 |
-
"eval_f1": 0.
|
193 |
-
"eval_loss": 0.
|
194 |
-
"eval_precision": 0.
|
195 |
-
"eval_recall": 0.
|
196 |
-
"eval_runtime": 5.
|
197 |
-
"eval_samples_per_second": 71.
|
198 |
-
"eval_steps_per_second": 8.
|
199 |
"step": 1220
|
200 |
},
|
201 |
{
|
202 |
"epoch": 11.0,
|
203 |
-
"grad_norm":
|
204 |
"learning_rate": 2.25e-05,
|
205 |
-
"loss": 0.
|
206 |
"step": 1342
|
207 |
},
|
208 |
{
|
209 |
"epoch": 11.0,
|
210 |
-
"eval_accuracy": 0.
|
211 |
-
"eval_f1": 0.
|
212 |
-
"eval_loss": 0.
|
213 |
-
"eval_precision": 0.
|
214 |
-
"eval_recall": 0.
|
215 |
-
"eval_runtime": 5.
|
216 |
-
"eval_samples_per_second": 71.
|
217 |
-
"eval_steps_per_second": 8.
|
218 |
"step": 1342
|
219 |
},
|
220 |
{
|
221 |
"epoch": 12.0,
|
222 |
-
"grad_norm": 3.
|
223 |
"learning_rate": 2e-05,
|
224 |
"loss": 0.1553,
|
225 |
"step": 1464
|
226 |
},
|
227 |
{
|
228 |
"epoch": 12.0,
|
229 |
-
"eval_accuracy": 0.
|
230 |
-
"eval_f1": 0.
|
231 |
-
"eval_loss": 0.
|
232 |
-
"eval_precision": 0.
|
233 |
-
"eval_recall": 0.
|
234 |
-
"eval_runtime": 5.
|
235 |
-
"eval_samples_per_second": 71.
|
236 |
-
"eval_steps_per_second": 8.
|
237 |
"step": 1464
|
238 |
},
|
239 |
{
|
240 |
"epoch": 13.0,
|
241 |
-
"grad_norm":
|
242 |
"learning_rate": 1.75e-05,
|
243 |
-
"loss": 0.
|
244 |
"step": 1586
|
245 |
},
|
246 |
{
|
247 |
"epoch": 13.0,
|
248 |
-
"eval_accuracy": 0.
|
249 |
-
"eval_f1": 0.
|
250 |
-
"eval_loss": 0.
|
251 |
-
"eval_precision": 0.
|
252 |
-
"eval_recall": 0.
|
253 |
-
"eval_runtime": 5.
|
254 |
-
"eval_samples_per_second": 71.
|
255 |
-
"eval_steps_per_second": 8.
|
256 |
"step": 1586
|
257 |
},
|
258 |
{
|
259 |
"epoch": 14.0,
|
260 |
-
"grad_norm":
|
261 |
"learning_rate": 1.5e-05,
|
262 |
-
"loss": 0.
|
263 |
"step": 1708
|
264 |
},
|
265 |
{
|
266 |
"epoch": 14.0,
|
267 |
-
"eval_accuracy": 0.
|
268 |
-
"eval_f1": 0.
|
269 |
-
"eval_loss": 0.
|
270 |
-
"eval_precision": 0.
|
271 |
-
"eval_recall": 0.
|
272 |
-
"eval_runtime": 5.
|
273 |
-
"eval_samples_per_second": 71.
|
274 |
-
"eval_steps_per_second": 8.
|
275 |
"step": 1708
|
276 |
},
|
277 |
{
|
278 |
"epoch": 15.0,
|
279 |
-
"grad_norm": 0.
|
280 |
"learning_rate": 1.25e-05,
|
281 |
-
"loss": 0.
|
282 |
"step": 1830
|
283 |
},
|
284 |
{
|
285 |
"epoch": 15.0,
|
286 |
-
"eval_accuracy": 0.
|
287 |
-
"eval_f1": 0.
|
288 |
-
"eval_loss": 0.
|
289 |
-
"eval_precision": 0.
|
290 |
-
"eval_recall": 0.
|
291 |
-
"eval_runtime": 5.
|
292 |
-
"eval_samples_per_second": 71.
|
293 |
-
"eval_steps_per_second": 8.
|
294 |
"step": 1830
|
295 |
},
|
296 |
{
|
297 |
"epoch": 16.0,
|
298 |
-
"grad_norm": 0.
|
299 |
"learning_rate": 1e-05,
|
300 |
-
"loss": 0.
|
301 |
"step": 1952
|
302 |
},
|
303 |
{
|
304 |
"epoch": 16.0,
|
305 |
-
"eval_accuracy": 0.
|
306 |
-
"eval_f1": 0.
|
307 |
-
"eval_loss": 0.
|
308 |
-
"eval_precision": 0.
|
309 |
-
"eval_recall": 0.
|
310 |
-
"eval_runtime": 5.
|
311 |
-
"eval_samples_per_second": 70.
|
312 |
-
"eval_steps_per_second": 8.
|
313 |
"step": 1952
|
314 |
},
|
315 |
{
|
316 |
"epoch": 17.0,
|
317 |
-
"grad_norm":
|
318 |
"learning_rate": 7.5e-06,
|
319 |
-
"loss": 0.
|
320 |
"step": 2074
|
321 |
},
|
322 |
{
|
323 |
"epoch": 17.0,
|
324 |
-
"eval_accuracy": 0.
|
325 |
-
"eval_f1": 0.
|
326 |
-
"eval_loss": 0.
|
327 |
-
"eval_precision": 0.
|
328 |
-
"eval_recall": 0.
|
329 |
-
"eval_runtime": 5.
|
330 |
-
"eval_samples_per_second": 71.
|
331 |
-
"eval_steps_per_second": 8.
|
332 |
"step": 2074
|
333 |
},
|
334 |
{
|
335 |
"epoch": 18.0,
|
336 |
-
"grad_norm":
|
337 |
"learning_rate": 5e-06,
|
338 |
-
"loss": 0.
|
339 |
"step": 2196
|
340 |
},
|
341 |
{
|
342 |
"epoch": 18.0,
|
343 |
-
"eval_accuracy": 0.
|
344 |
-
"eval_f1": 0.
|
345 |
-
"eval_loss": 0.
|
346 |
-
"eval_precision": 0.
|
347 |
-
"eval_recall": 0.
|
348 |
-
"eval_runtime": 5.
|
349 |
-
"eval_samples_per_second": 71.
|
350 |
-
"eval_steps_per_second": 8.
|
351 |
"step": 2196
|
352 |
},
|
353 |
{
|
354 |
"epoch": 19.0,
|
355 |
-
"grad_norm":
|
356 |
"learning_rate": 2.5e-06,
|
357 |
-
"loss": 0.
|
358 |
"step": 2318
|
359 |
},
|
360 |
{
|
361 |
"epoch": 19.0,
|
362 |
-
"eval_accuracy": 0.
|
363 |
-
"eval_f1": 0.
|
364 |
-
"eval_loss": 0.
|
365 |
-
"eval_precision": 0.
|
366 |
-
"eval_recall": 0.
|
367 |
-
"eval_runtime": 5.
|
368 |
-
"eval_samples_per_second": 71.
|
369 |
-
"eval_steps_per_second": 8.
|
370 |
"step": 2318
|
371 |
},
|
372 |
{
|
373 |
"epoch": 20.0,
|
374 |
-
"grad_norm":
|
375 |
"learning_rate": 0.0,
|
376 |
-
"loss": 0.
|
377 |
"step": 2440
|
378 |
},
|
379 |
{
|
380 |
"epoch": 20.0,
|
381 |
"eval_accuracy": 0.9022556390977443,
|
382 |
-
"eval_f1": 0.
|
383 |
-
"eval_loss": 0.
|
384 |
-
"eval_precision": 0.
|
385 |
-
"eval_recall": 0.
|
386 |
-
"eval_runtime": 5.
|
387 |
-
"eval_samples_per_second": 71.
|
388 |
-
"eval_steps_per_second": 8.
|
389 |
"step": 2440
|
390 |
},
|
391 |
{
|
392 |
"epoch": 20.0,
|
393 |
"step": 2440,
|
394 |
"total_flos": 8551203605328000.0,
|
395 |
-
"train_loss": 0.
|
396 |
-
"train_runtime":
|
397 |
-
"train_samples_per_second": 31.
|
398 |
-
"train_steps_per_second": 1.
|
399 |
}
|
400 |
],
|
401 |
"logging_steps": 500,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
+
"grad_norm": 4.303664207458496,
|
14 |
"learning_rate": 4.75e-05,
|
15 |
+
"loss": 0.5535,
|
16 |
"step": 122
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.0,
|
20 |
+
"eval_accuracy": 0.7293233082706767,
|
21 |
+
"eval_f1": 0.6372727272727272,
|
22 |
+
"eval_loss": 0.49923330545425415,
|
23 |
+
"eval_precision": 0.6645702306079665,
|
24 |
+
"eval_recall": 0.6284779050736498,
|
25 |
+
"eval_runtime": 5.2819,
|
26 |
+
"eval_samples_per_second": 75.541,
|
27 |
+
"eval_steps_per_second": 9.466,
|
28 |
"step": 122
|
29 |
},
|
30 |
{
|
31 |
"epoch": 2.0,
|
32 |
+
"grad_norm": 4.080423831939697,
|
33 |
"learning_rate": 4.5e-05,
|
34 |
+
"loss": 0.444,
|
35 |
"step": 244
|
36 |
},
|
37 |
{
|
38 |
"epoch": 2.0,
|
39 |
+
"eval_accuracy": 0.8170426065162907,
|
40 |
+
"eval_f1": 0.7960536910871955,
|
41 |
+
"eval_loss": 0.4052737355232239,
|
42 |
+
"eval_precision": 0.7846938775510204,
|
43 |
+
"eval_recall": 0.8255591925804692,
|
44 |
+
"eval_runtime": 5.5544,
|
45 |
+
"eval_samples_per_second": 71.835,
|
46 |
+
"eval_steps_per_second": 9.002,
|
47 |
"step": 244
|
48 |
},
|
49 |
{
|
50 |
"epoch": 3.0,
|
51 |
+
"grad_norm": 4.110426425933838,
|
52 |
"learning_rate": 4.25e-05,
|
53 |
+
"loss": 0.3464,
|
54 |
"step": 366
|
55 |
},
|
56 |
{
|
57 |
"epoch": 3.0,
|
58 |
+
"eval_accuracy": 0.8421052631578947,
|
59 |
+
"eval_f1": 0.7905197629940748,
|
60 |
+
"eval_loss": 0.3424628674983978,
|
61 |
+
"eval_precision": 0.8345238095238094,
|
62 |
+
"eval_recall": 0.7682760501909438,
|
63 |
+
"eval_runtime": 5.6127,
|
64 |
+
"eval_samples_per_second": 71.089,
|
65 |
+
"eval_steps_per_second": 8.908,
|
66 |
"step": 366
|
67 |
},
|
68 |
{
|
69 |
"epoch": 4.0,
|
70 |
+
"grad_norm": 1.6437464952468872,
|
71 |
"learning_rate": 4e-05,
|
72 |
+
"loss": 0.2852,
|
73 |
"step": 488
|
74 |
},
|
75 |
{
|
76 |
"epoch": 4.0,
|
77 |
+
"eval_accuracy": 0.8721804511278195,
|
78 |
+
"eval_f1": 0.8469505178365937,
|
79 |
+
"eval_loss": 0.3135569393634796,
|
80 |
+
"eval_precision": 0.844489247311828,
|
81 |
+
"eval_recall": 0.8495635570103655,
|
82 |
+
"eval_runtime": 5.5998,
|
83 |
+
"eval_samples_per_second": 71.253,
|
84 |
+
"eval_steps_per_second": 8.929,
|
85 |
"step": 488
|
86 |
},
|
87 |
{
|
88 |
"epoch": 5.0,
|
89 |
+
"grad_norm": 3.6121034622192383,
|
90 |
"learning_rate": 3.7500000000000003e-05,
|
91 |
+
"loss": 0.2608,
|
92 |
"step": 610
|
93 |
},
|
94 |
{
|
95 |
"epoch": 5.0,
|
96 |
+
"eval_accuracy": 0.8721804511278195,
|
97 |
+
"eval_f1": 0.8469505178365937,
|
98 |
+
"eval_loss": 0.3060314953327179,
|
99 |
+
"eval_precision": 0.844489247311828,
|
100 |
+
"eval_recall": 0.8495635570103655,
|
101 |
+
"eval_runtime": 5.6686,
|
102 |
+
"eval_samples_per_second": 70.388,
|
103 |
+
"eval_steps_per_second": 8.821,
|
104 |
"step": 610
|
105 |
},
|
106 |
{
|
107 |
"epoch": 6.0,
|
108 |
+
"grad_norm": 2.0154571533203125,
|
109 |
"learning_rate": 3.5e-05,
|
110 |
+
"loss": 0.2415,
|
111 |
"step": 732
|
112 |
},
|
113 |
{
|
114 |
"epoch": 6.0,
|
115 |
+
"eval_accuracy": 0.8646616541353384,
|
116 |
+
"eval_f1": 0.8447157518450185,
|
117 |
+
"eval_loss": 0.3100413978099823,
|
118 |
+
"eval_precision": 0.8325401217487549,
|
119 |
+
"eval_recall": 0.864248045099109,
|
120 |
+
"eval_runtime": 5.5541,
|
121 |
+
"eval_samples_per_second": 71.838,
|
122 |
+
"eval_steps_per_second": 9.002,
|
123 |
"step": 732
|
124 |
},
|
125 |
{
|
126 |
"epoch": 7.0,
|
127 |
+
"grad_norm": 0.5025161504745483,
|
128 |
"learning_rate": 3.2500000000000004e-05,
|
129 |
+
"loss": 0.2329,
|
130 |
"step": 854
|
131 |
},
|
132 |
{
|
133 |
"epoch": 7.0,
|
134 |
+
"eval_accuracy": 0.8847117794486216,
|
135 |
+
"eval_f1": 0.8642214594306682,
|
136 |
+
"eval_loss": 0.28597915172576904,
|
137 |
+
"eval_precision": 0.8566755442334414,
|
138 |
+
"eval_recall": 0.8734315330060011,
|
139 |
+
"eval_runtime": 5.5629,
|
140 |
+
"eval_samples_per_second": 71.725,
|
141 |
+
"eval_steps_per_second": 8.988,
|
142 |
"step": 854
|
143 |
},
|
144 |
{
|
145 |
"epoch": 8.0,
|
146 |
+
"grad_norm": 2.4278862476348877,
|
147 |
"learning_rate": 3e-05,
|
148 |
+
"loss": 0.199,
|
149 |
"step": 976
|
150 |
},
|
151 |
{
|
152 |
"epoch": 8.0,
|
153 |
+
"eval_accuracy": 0.8872180451127819,
|
154 |
+
"eval_f1": 0.8622036668943447,
|
155 |
+
"eval_loss": 0.2878971993923187,
|
156 |
+
"eval_precision": 0.8671602787456446,
|
157 |
+
"eval_recall": 0.8577014002545917,
|
158 |
+
"eval_runtime": 5.6077,
|
159 |
+
"eval_samples_per_second": 71.152,
|
160 |
+
"eval_steps_per_second": 8.916,
|
161 |
"step": 976
|
162 |
},
|
163 |
{
|
164 |
"epoch": 9.0,
|
165 |
+
"grad_norm": 8.504670143127441,
|
166 |
"learning_rate": 2.7500000000000004e-05,
|
167 |
+
"loss": 0.1939,
|
168 |
"step": 1098
|
169 |
},
|
170 |
{
|
171 |
"epoch": 9.0,
|
172 |
+
"eval_accuracy": 0.8897243107769424,
|
173 |
+
"eval_f1": 0.8676337535436396,
|
174 |
+
"eval_loss": 0.28258949518203735,
|
175 |
+
"eval_precision": 0.8658613445378152,
|
176 |
+
"eval_recall": 0.8694762684124386,
|
177 |
+
"eval_runtime": 5.5938,
|
178 |
+
"eval_samples_per_second": 71.329,
|
179 |
+
"eval_steps_per_second": 8.939,
|
180 |
"step": 1098
|
181 |
},
|
182 |
{
|
183 |
"epoch": 10.0,
|
184 |
+
"grad_norm": 2.462061882019043,
|
185 |
"learning_rate": 2.5e-05,
|
186 |
+
"loss": 0.1806,
|
187 |
"step": 1220
|
188 |
},
|
189 |
{
|
190 |
"epoch": 10.0,
|
191 |
+
"eval_accuracy": 0.8796992481203008,
|
192 |
+
"eval_f1": 0.8439374185136896,
|
193 |
+
"eval_loss": 0.2981988787651062,
|
194 |
+
"eval_precision": 0.8794955044955045,
|
195 |
+
"eval_recall": 0.822376795781051,
|
196 |
+
"eval_runtime": 5.5576,
|
197 |
+
"eval_samples_per_second": 71.793,
|
198 |
+
"eval_steps_per_second": 8.997,
|
199 |
"step": 1220
|
200 |
},
|
201 |
{
|
202 |
"epoch": 11.0,
|
203 |
+
"grad_norm": 1.0077548027038574,
|
204 |
"learning_rate": 2.25e-05,
|
205 |
+
"loss": 0.1674,
|
206 |
"step": 1342
|
207 |
},
|
208 |
{
|
209 |
"epoch": 11.0,
|
210 |
+
"eval_accuracy": 0.8947368421052632,
|
211 |
+
"eval_f1": 0.8730223677032187,
|
212 |
+
"eval_loss": 0.2734816372394562,
|
213 |
+
"eval_precision": 0.8730223677032187,
|
214 |
+
"eval_recall": 0.8730223677032187,
|
215 |
+
"eval_runtime": 5.5924,
|
216 |
+
"eval_samples_per_second": 71.346,
|
217 |
+
"eval_steps_per_second": 8.941,
|
218 |
"step": 1342
|
219 |
},
|
220 |
{
|
221 |
"epoch": 12.0,
|
222 |
+
"grad_norm": 3.9673709869384766,
|
223 |
"learning_rate": 2e-05,
|
224 |
"loss": 0.1553,
|
225 |
"step": 1464
|
226 |
},
|
227 |
{
|
228 |
"epoch": 12.0,
|
229 |
+
"eval_accuracy": 0.8947368421052632,
|
230 |
+
"eval_f1": 0.8717238211879976,
|
231 |
+
"eval_loss": 0.2753015458583832,
|
232 |
+
"eval_precision": 0.8757194133300328,
|
233 |
+
"eval_recall": 0.8680214584469903,
|
234 |
+
"eval_runtime": 5.5661,
|
235 |
+
"eval_samples_per_second": 71.684,
|
236 |
+
"eval_steps_per_second": 8.983,
|
237 |
"step": 1464
|
238 |
},
|
239 |
{
|
240 |
"epoch": 13.0,
|
241 |
+
"grad_norm": 3.968949794769287,
|
242 |
"learning_rate": 1.75e-05,
|
243 |
+
"loss": 0.1431,
|
244 |
"step": 1586
|
245 |
},
|
246 |
{
|
247 |
"epoch": 13.0,
|
248 |
+
"eval_accuracy": 0.8922305764411027,
|
249 |
+
"eval_f1": 0.8661961395983623,
|
250 |
+
"eval_loss": 0.2937251627445221,
|
251 |
+
"eval_precision": 0.8784532165625604,
|
252 |
+
"eval_recall": 0.8562465902891435,
|
253 |
+
"eval_runtime": 5.5699,
|
254 |
+
"eval_samples_per_second": 71.635,
|
255 |
+
"eval_steps_per_second": 8.977,
|
256 |
"step": 1586
|
257 |
},
|
258 |
{
|
259 |
"epoch": 14.0,
|
260 |
+
"grad_norm": 8.566404342651367,
|
261 |
"learning_rate": 1.5e-05,
|
262 |
+
"loss": 0.1417,
|
263 |
"step": 1708
|
264 |
},
|
265 |
{
|
266 |
"epoch": 14.0,
|
267 |
+
"eval_accuracy": 0.9072681704260651,
|
268 |
+
"eval_f1": 0.8910359080340997,
|
269 |
+
"eval_loss": 0.29110613465309143,
|
270 |
+
"eval_precision": 0.8822647601476015,
|
271 |
+
"eval_recall": 0.9018912529550827,
|
272 |
+
"eval_runtime": 5.5858,
|
273 |
+
"eval_samples_per_second": 71.432,
|
274 |
+
"eval_steps_per_second": 8.951,
|
275 |
"step": 1708
|
276 |
},
|
277 |
{
|
278 |
"epoch": 15.0,
|
279 |
+
"grad_norm": 0.1758796125650406,
|
280 |
"learning_rate": 1.25e-05,
|
281 |
+
"loss": 0.1236,
|
282 |
"step": 1830
|
283 |
},
|
284 |
{
|
285 |
"epoch": 15.0,
|
286 |
+
"eval_accuracy": 0.9022556390977443,
|
287 |
+
"eval_f1": 0.8817957385392532,
|
288 |
+
"eval_loss": 0.2955999970436096,
|
289 |
+
"eval_precision": 0.8827677592299257,
|
290 |
+
"eval_recall": 0.8808419712675032,
|
291 |
+
"eval_runtime": 5.6052,
|
292 |
+
"eval_samples_per_second": 71.184,
|
293 |
+
"eval_steps_per_second": 8.92,
|
294 |
"step": 1830
|
295 |
},
|
296 |
{
|
297 |
"epoch": 16.0,
|
298 |
+
"grad_norm": 0.7015694975852966,
|
299 |
"learning_rate": 1e-05,
|
300 |
+
"loss": 0.1304,
|
301 |
"step": 1952
|
302 |
},
|
303 |
{
|
304 |
"epoch": 16.0,
|
305 |
+
"eval_accuracy": 0.9022556390977443,
|
306 |
+
"eval_f1": 0.884617951284618,
|
307 |
+
"eval_loss": 0.3010990023612976,
|
308 |
+
"eval_precision": 0.8772893772893773,
|
309 |
+
"eval_recall": 0.8933442444080741,
|
310 |
+
"eval_runtime": 5.6368,
|
311 |
+
"eval_samples_per_second": 70.785,
|
312 |
+
"eval_steps_per_second": 8.87,
|
313 |
"step": 1952
|
314 |
},
|
315 |
{
|
316 |
"epoch": 17.0,
|
317 |
+
"grad_norm": 0.19915825128555298,
|
318 |
"learning_rate": 7.5e-06,
|
319 |
+
"loss": 0.1164,
|
320 |
"step": 2074
|
321 |
},
|
322 |
{
|
323 |
"epoch": 17.0,
|
324 |
+
"eval_accuracy": 0.899749373433584,
|
325 |
+
"eval_f1": 0.879667048676036,
|
326 |
+
"eval_loss": 0.29428762197494507,
|
327 |
+
"eval_precision": 0.8778361344537815,
|
328 |
+
"eval_recall": 0.8815693762502272,
|
329 |
+
"eval_runtime": 5.5793,
|
330 |
+
"eval_samples_per_second": 71.514,
|
331 |
+
"eval_steps_per_second": 8.962,
|
332 |
"step": 2074
|
333 |
},
|
334 |
{
|
335 |
"epoch": 18.0,
|
336 |
+
"grad_norm": 9.03445816040039,
|
337 |
"learning_rate": 5e-06,
|
338 |
+
"loss": 0.1144,
|
339 |
"step": 2196
|
340 |
},
|
341 |
{
|
342 |
"epoch": 18.0,
|
343 |
+
"eval_accuracy": 0.8972431077694235,
|
344 |
+
"eval_f1": 0.8775533117267087,
|
345 |
+
"eval_loss": 0.2937219738960266,
|
346 |
+
"eval_precision": 0.873246730188791,
|
347 |
+
"eval_recall": 0.8822967812329514,
|
348 |
+
"eval_runtime": 5.5648,
|
349 |
+
"eval_samples_per_second": 71.7,
|
350 |
+
"eval_steps_per_second": 8.985,
|
351 |
"step": 2196
|
352 |
},
|
353 |
{
|
354 |
"epoch": 19.0,
|
355 |
+
"grad_norm": 5.45957612991333,
|
356 |
"learning_rate": 2.5e-06,
|
357 |
+
"loss": 0.1198,
|
358 |
"step": 2318
|
359 |
},
|
360 |
{
|
361 |
"epoch": 19.0,
|
362 |
+
"eval_accuracy": 0.8972431077694235,
|
363 |
+
"eval_f1": 0.8737897035111135,
|
364 |
+
"eval_loss": 0.29848915338516235,
|
365 |
+
"eval_precision": 0.8812047813777917,
|
366 |
+
"eval_recall": 0.8672940534642661,
|
367 |
+
"eval_runtime": 5.5642,
|
368 |
+
"eval_samples_per_second": 71.709,
|
369 |
+
"eval_steps_per_second": 8.986,
|
370 |
"step": 2318
|
371 |
},
|
372 |
{
|
373 |
"epoch": 20.0,
|
374 |
+
"grad_norm": 3.5000033378601074,
|
375 |
"learning_rate": 0.0,
|
376 |
+
"loss": 0.1104,
|
377 |
"step": 2440
|
378 |
},
|
379 |
{
|
380 |
"epoch": 20.0,
|
381 |
"eval_accuracy": 0.9022556390977443,
|
382 |
+
"eval_f1": 0.8811928811928812,
|
383 |
+
"eval_loss": 0.29284632205963135,
|
384 |
+
"eval_precision": 0.8842105263157894,
|
385 |
+
"eval_recall": 0.878341516639389,
|
386 |
+
"eval_runtime": 5.5868,
|
387 |
+
"eval_samples_per_second": 71.418,
|
388 |
+
"eval_steps_per_second": 8.95,
|
389 |
"step": 2440
|
390 |
},
|
391 |
{
|
392 |
"epoch": 20.0,
|
393 |
"step": 2440,
|
394 |
"total_flos": 8551203605328000.0,
|
395 |
+
"train_loss": 0.2130153269064231,
|
396 |
+
"train_runtime": 2280.4037,
|
397 |
+
"train_samples_per_second": 31.907,
|
398 |
+
"train_steps_per_second": 1.07
|
399 |
}
|
400 |
],
|
401 |
"logging_steps": 500,
|