Rodrigo1771 commited on
Commit
e7b0990
1 Parent(s): 7556a63

End of training

Browse files
README.md CHANGED
@@ -3,9 +3,10 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: michiyasunaga/BioLinkBERT-base
5
  tags:
 
6
  - generated_from_trainer
7
  datasets:
8
- - drugtemist-en-fasttext-75-ner
9
  metrics:
10
  - precision
11
  - recall
@@ -18,24 +19,24 @@ model-index:
18
  name: Token Classification
19
  type: token-classification
20
  dataset:
21
- name: drugtemist-en-fasttext-75-ner
22
- type: drugtemist-en-fasttext-75-ner
23
  config: DrugTEMIST English NER
24
  split: validation
25
  args: DrugTEMIST English NER
26
  metrics:
27
  - name: Precision
28
  type: precision
29
- value: 0.9194876486733761
30
  - name: Recall
31
  type: recall
32
- value: 0.9366262814538676
33
  - name: F1
34
  type: f1
35
- value: 0.92797783933518
36
  - name: Accuracy
37
  type: accuracy
38
- value: 0.9987511511734993
39
  ---
40
 
41
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -43,12 +44,12 @@ should probably proofread and complete it, then remove this comment. -->
43
 
44
  # output
45
 
46
- This model is a fine-tuned version of [michiyasunaga/BioLinkBERT-base](https://huggingface.co/michiyasunaga/BioLinkBERT-base) on the drugtemist-en-fasttext-75-ner dataset.
47
  It achieves the following results on the evaluation set:
48
- - Loss: 0.0077
49
- - Precision: 0.9195
50
- - Recall: 0.9366
51
- - F1: 0.9280
52
  - Accuracy: 0.9988
53
 
54
  ## Model description
 
3
  license: apache-2.0
4
  base_model: michiyasunaga/BioLinkBERT-base
5
  tags:
6
+ - token-classification
7
  - generated_from_trainer
8
  datasets:
9
+ - Rodrigo1771/drugtemist-en-fasttext-75-ner
10
  metrics:
11
  - precision
12
  - recall
 
19
  name: Token Classification
20
  type: token-classification
21
  dataset:
22
+ name: Rodrigo1771/drugtemist-en-fasttext-75-ner
23
+ type: Rodrigo1771/drugtemist-en-fasttext-75-ner
24
  config: DrugTEMIST English NER
25
  split: validation
26
  args: DrugTEMIST English NER
27
  metrics:
28
  - name: Precision
29
  type: precision
30
+ value: 0.9249771271729186
31
  - name: Recall
32
  type: recall
33
+ value: 0.9422180801491147
34
  - name: F1
35
  type: f1
36
+ value: 0.9335180055401663
37
  - name: Accuracy
38
  type: accuracy
39
+ value: 0.998772081600759
40
  ---
41
 
42
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
44
 
45
  # output
46
 
47
+ This model is a fine-tuned version of [michiyasunaga/BioLinkBERT-base](https://huggingface.co/michiyasunaga/BioLinkBERT-base) on the Rodrigo1771/drugtemist-en-fasttext-75-ner dataset.
48
  It achieves the following results on the evaluation set:
49
+ - Loss: 0.0076
50
+ - Precision: 0.9250
51
+ - Recall: 0.9422
52
+ - F1: 0.9335
53
  - Accuracy: 0.9988
54
 
55
  ## Model description
all_results.json CHANGED
@@ -1,26 +1,26 @@
1
  {
2
- "epoch": 9.967845659163988,
3
- "eval_accuracy": 0.9496615226667522,
4
- "eval_f1": 0.6946045049764275,
5
- "eval_loss": 0.24502409994602203,
6
- "eval_precision": 0.6659969864389754,
7
- "eval_recall": 0.7257799671592775,
8
- "eval_runtime": 5.9273,
9
- "eval_samples": 2519,
10
- "eval_samples_per_second": 424.981,
11
- "eval_steps_per_second": 53.144,
12
- "predict_accuracy": 0.9468015056363083,
13
- "predict_f1": 0.6798307475317349,
14
- "predict_loss": 0.26582667231559753,
15
- "predict_precision": 0.6624923640806353,
16
- "predict_recall": 0.6981010621177985,
17
- "predict_runtime": 9.8107,
18
- "predict_samples_per_second": 412.509,
19
- "predict_steps_per_second": 51.576,
20
- "total_flos": 4644619911314910.0,
21
- "train_loss": 0.050868147861573,
22
- "train_runtime": 855.1929,
23
- "train_samples": 9929,
24
- "train_samples_per_second": 116.102,
25
- "train_steps_per_second": 1.812
26
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.998772081600759,
4
+ "eval_f1": 0.9335180055401663,
5
+ "eval_loss": 0.007628325838595629,
6
+ "eval_precision": 0.9249771271729186,
7
+ "eval_recall": 0.9422180801491147,
8
+ "eval_runtime": 15.1819,
9
+ "eval_samples": 6946,
10
+ "eval_samples_per_second": 457.519,
11
+ "eval_steps_per_second": 57.239,
12
+ "predict_accuracy": 0.9986685364931299,
13
+ "predict_f1": 0.9202592279515356,
14
+ "predict_loss": 0.007816320285201073,
15
+ "predict_precision": 0.8938149972632731,
16
+ "predict_recall": 0.9483159117305459,
17
+ "predict_runtime": 28.7456,
18
+ "predict_samples_per_second": 511.904,
19
+ "predict_steps_per_second": 64.01,
20
+ "total_flos": 1.394810359803495e+16,
21
+ "train_loss": 0.0028968164414402532,
22
+ "train_runtime": 2196.5741,
23
+ "train_samples": 32447,
24
+ "train_samples_per_second": 147.716,
25
+ "train_steps_per_second": 2.308
26
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 9.967845659163988,
3
- "eval_accuracy": 0.9496615226667522,
4
- "eval_f1": 0.6946045049764275,
5
- "eval_loss": 0.24502409994602203,
6
- "eval_precision": 0.6659969864389754,
7
- "eval_recall": 0.7257799671592775,
8
- "eval_runtime": 5.9273,
9
- "eval_samples": 2519,
10
- "eval_samples_per_second": 424.981,
11
- "eval_steps_per_second": 53.144
12
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.998772081600759,
4
+ "eval_f1": 0.9335180055401663,
5
+ "eval_loss": 0.007628325838595629,
6
+ "eval_precision": 0.9249771271729186,
7
+ "eval_recall": 0.9422180801491147,
8
+ "eval_runtime": 15.1819,
9
+ "eval_samples": 6946,
10
+ "eval_samples_per_second": 457.519,
11
+ "eval_steps_per_second": 57.239
12
  }
predict_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "predict_accuracy": 0.9468015056363083,
3
- "predict_f1": 0.6798307475317349,
4
- "predict_loss": 0.26582667231559753,
5
- "predict_precision": 0.6624923640806353,
6
- "predict_recall": 0.6981010621177985,
7
- "predict_runtime": 9.8107,
8
- "predict_samples_per_second": 412.509,
9
- "predict_steps_per_second": 51.576
10
  }
 
1
  {
2
+ "predict_accuracy": 0.9986685364931299,
3
+ "predict_f1": 0.9202592279515356,
4
+ "predict_loss": 0.007816320285201073,
5
+ "predict_precision": 0.8938149972632731,
6
+ "predict_recall": 0.9483159117305459,
7
+ "predict_runtime": 28.7456,
8
+ "predict_samples_per_second": 511.904,
9
+ "predict_steps_per_second": 64.01
10
  }
predictions.txt CHANGED
The diff for this file is too large to render. See raw diff
 
tb/events.out.tfevents.1725888457.0a1c9bec2a53.24273.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bb5583c37fdbd9ea659142a09fba0fae95a78d6f0eaac64a0292bc00e64215d
3
+ size 560
train.log CHANGED
@@ -1429,3 +1429,51 @@ Training completed. Do not forget to share your model on huggingface.co/models =
1429
  {'eval_loss': 0.00769586768001318, 'eval_precision': 0.9194876486733761, 'eval_recall': 0.9366262814538676, 'eval_f1': 0.92797783933518, 'eval_accuracy': 0.9987511511734993, 'eval_runtime': 15.2047, 'eval_samples_per_second': 456.832, 'eval_steps_per_second': 57.153, 'epoch': 10.0}
1430
  {'train_runtime': 2196.5741, 'train_samples_per_second': 147.716, 'train_steps_per_second': 2.308, 'train_loss': 0.0028968164414402532, 'epoch': 10.0}
1431
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1432
  0%| | 0/869 [00:00<?, ?it/s]
1433
  1%| | 10/869 [00:00<00:09, 93.14it/s]
1434
  2%|▏ | 20/869 [00:00<00:10, 79.68it/s]
1435
  3%|▎ | 29/869 [00:00<00:10, 78.36it/s]
1436
  4%|▍ | 37/869 [00:00<00:11, 75.35it/s]
1437
  5%|▌ | 46/869 [00:00<00:10, 78.54it/s]
1438
  6%|▋ | 55/869 [00:00<00:10, 81.34it/s]
1439
  7%|▋ | 64/869 [00:00<00:10, 77.39it/s]
1440
  8%|▊ | 72/869 [00:00<00:10, 77.10it/s]
1441
  9%|▉ | 81/869 [00:01<00:09, 80.79it/s]
1442
  10%|█ | 90/869 [00:01<00:09, 83.27it/s]
1443
  12%|█▏ | 100/869 [00:01<00:08, 85.89it/s]
1444
  13%|█▎ | 109/869 [00:01<00:09, 82.19it/s]
1445
  14%|█▎ | 118/869 [00:01<00:09, 80.71it/s]
1446
  15%|█▍ | 127/869 [00:01<00:09, 79.46it/s]
1447
  16%|█▌ | 136/869 [00:01<00:09, 80.95it/s]
1448
  17%|█▋ | 145/869 [00:01<00:09, 76.58it/s]
1449
  18%|█▊ | 154/869 [00:01<00:09, 78.77it/s]
1450
  19%|█▉ | 163/869 [00:02<00:08, 78.83it/s]
1451
  20%|█▉ | 171/869 [00:02<00:09, 76.42it/s]
1452
  21%|██ | 179/869 [00:02<00:09, 75.56it/s]
1453
  22%|██▏ | 188/869 [00:02<00:08, 77.42it/s]
1454
  23%|██▎ | 196/869 [00:02<00:09, 74.78it/s]
1455
  24%|██▎ | 205/869 [00:02<00:08, 77.47it/s]
1456
  25%|██▍ | 214/869 [00:02<00:08, 79.04it/s]
1457
  26%|██▌ | 223/869 [00:02<00:08, 80.38it/s]
1458
  27%|██▋ | 232/869 [00:02<00:08, 76.59it/s]
1459
  28%|██▊ | 240/869 [00:03<00:08, 77.10it/s]
1460
  29%|██▊ | 248/869 [00:03<00:08, 71.77it/s]
1461
  30%|██▉ | 257/869 [00:03<00:08, 74.54it/s]
1462
  30%|███ | 265/869 [00:03<00:08, 73.96it/s]
1463
  31%|███▏ | 273/869 [00:03<00:07, 74.71it/s]
1464
  32%|███▏ | 281/869 [00:03<00:07, 74.68it/s]
1465
  33%|███▎ | 289/869 [00:03<00:07, 73.95it/s]
1466
  34%|███▍ | 297/869 [00:03<00:07, 74.29it/s]
1467
  35%|███▌ | 305/869 [00:03<00:08, 70.32it/s]
1468
  36%|███▌ | 313/869 [00:04<00:07, 71.72it/s]
1469
  37%|███▋ | 321/869 [00:04<00:07, 71.00it/s]
1470
  38%|███▊ | 330/869 [00:04<00:07, 74.74it/s]
1471
  39%|███▉ | 339/869 [00:04<00:06, 76.71it/s]
1472
  40%|███▉ | 347/869 [00:04<00:07, 72.09it/s]
1473
  41%|████ | 355/869 [00:04<00:07, 72.99it/s]
1474
  42%|████▏ | 364/869 [00:04<00:06, 76.76it/s]
1475
  43%|████▎ | 373/869 [00:04<00:06, 74.39it/s]
1476
  44%|████▍ | 381/869 [00:04<00:06, 75.28it/s]
1477
  45%|████▍ | 389/869 [00:05<00:06, 70.47it/s]
1478
  46%|████▌ | 398/869 [00:05<00:06, 73.16it/s]
1479
  47%|████▋ | 407/869 [00:05<00:06, 75.53it/s]
1480
  48%|████▊ | 416/869 [00:05<00:05, 78.44it/s]
1481
  49%|████▉ | 424/869 [00:05<00:05, 78.67it/s]
1482
  50%|████▉ | 433/869 [00:05<00:05, 79.51it/s]
1483
  51%|█████ | 441/869 [00:05<00:05, 77.23it/s]
1484
  52%|█████▏ | 449/869 [00:05<00:05, 75.66it/s]
1485
  53%|█████▎ | 457/869 [00:05<00:05, 76.56it/s]
1486
  54%|█████▎ | 465/869 [00:06<00:05, 74.67it/s]
1487
  55%|█████▍ | 475/869 [00:06<00:04, 79.79it/s]
1488
  56%|█████▌ | 484/869 [00:06<00:04, 82.19it/s]
1489
  57%|█████▋ | 493/869 [00:06<00:04, 76.64it/s]
1490
  58%|█████▊ | 501/869 [00:06<00:04, 76.35it/s]
1491
  59%|█████▊ | 509/869 [00:06<00:04, 73.64it/s]
1492
  60%|█████▉ | 518/869 [00:06<00:04, 76.46it/s]
1493
  61%|██████ | 526/869 [00:06<00:04, 71.73it/s]
1494
  61%|██████▏ | 534/869 [00:06<00:04, 73.69it/s]
1495
  62%|██████▏ | 542/869 [00:07<00:04, 70.92it/s]
1496
  63%|██████▎ | 551/869 [00:07<00:04, 74.82it/s]
1497
  64%|██████▍ | 559/869 [00:07<00:04, 74.53it/s]
1498
  65%|██████▌ | 567/869 [00:07<00:04, 73.79it/s]
1499
  66%|██████▌ | 575/869 [00:07<00:03, 74.82it/s]
1500
  67%|██████▋ | 583/869 [00:07<00:03, 73.69it/s]
1501
  68%|██████▊ | 591/869 [00:07<00:03, 74.15it/s]
1502
  69%|██████▉ | 600/869 [00:07<00:03, 77.09it/s]
1503
  70%|███████ | 609/869 [00:07<00:03, 78.40it/s]
1504
  71%|███████ | 617/869 [00:08<00:03, 77.00it/s]
1505
  72%|███████▏ | 625/869 [00:08<00:03, 75.42it/s]
1506
  73%|███████▎ | 633/869 [00:08<00:03, 75.64it/s]
1507
  74%|███████▍ | 641/869 [00:08<00:03, 75.26it/s]
1508
  75%|███████▍ | 650/869 [00:08<00:02, 78.48it/s]
1509
  76%|███████▌ | 658/869 [00:08<00:02, 77.25it/s]
1510
  77%|███████▋ | 667/869 [00:08<00:02, 78.48it/s]
1511
  78%|███████▊ | 677/869 [00:08<00:02, 82.17it/s]
1512
  79%|███████▉ | 686/869 [00:09<00:02, 70.63it/s]
1513
  80%|███████▉ | 694/869 [00:09<00:02, 72.95it/s]
1514
  81%|████████ | 702/869 [00:09<00:02, 72.96it/s]
1515
  82%|████████▏ | 711/869 [00:09<00:02, 75.37it/s]
1516
  83%|████████▎ | 719/869 [00:09<00:01, 75.52it/s]
1517
  84%|████████▍ | 728/869 [00:09<00:01, 77.16it/s]
1518
  85%|████████▍ | 736/869 [00:09<00:01, 77.53it/s]
1519
  86%|████████▌ | 744/869 [00:09<00:01, 77.74it/s]
1520
  87%|████████▋ | 752/869 [00:09<00:01, 74.45it/s]
1521
  88%|████████▊ | 761/869 [00:09<00:01, 77.76it/s]
1522
  88%|████████▊ | 769/869 [00:10<00:01, 73.83it/s]
1523
  89%|████████▉ | 777/869 [00:10<00:01, 65.48it/s]
1524
  90%|█████████ | 785/869 [00:10<00:01, 67.59it/s]
1525
  91%|█████████▏| 794/869 [00:10<00:01, 71.22it/s]
1526
  92%|█████████▏| 802/869 [00:10<00:00, 73.53it/s]
1527
  93%|█████████▎| 810/869 [00:10<00:00, 74.76it/s]
1528
  94%|█████████▍| 818/869 [00:10<00:00, 75.19it/s]
1529
  95%|█████████▌| 826/869 [00:10<00:00, 75.37it/s]
1530
  96%|█████████▌| 834/869 [00:10<00:00, 74.91it/s]
1531
  97%|█████████▋| 843/869 [00:11<00:00, 77.97it/s]
1532
  98%|█████████▊| 852/869 [00:11<00:00, 79.15it/s]
1533
  99%|█████████▉| 860/869 [00:11<00:00, 74.82it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1534
  0%| | 0/1840 [00:00<?, ?it/s]
1535
  1%| | 11/1840 [00:00<00:18, 98.85it/s]
1536
  1%| | 21/1840 [00:00<00:22, 79.68it/s]
1537
  2%|▏ | 30/1840 [00:00<00:22, 79.32it/s]
1538
  2%|▏ | 39/1840 [00:00<00:22, 79.99it/s]
1539
  3%|▎ | 48/1840 [00:00<00:22, 81.01it/s]
1540
  3%|▎ | 57/1840 [00:00<00:22, 80.70it/s]
1541
  4%|▎ | 66/1840 [00:00<00:22, 78.15it/s]
1542
  4%|▍ | 74/1840 [00:00<00:22, 78.25it/s]
1543
  5%|▍ | 83/1840 [00:01<00:21, 80.09it/s]
1544
  5%|▌ | 92/1840 [00:01<00:21, 79.74it/s]
1545
  6%|▌ | 102/1840 [00:01<00:20, 83.78it/s]
1546
  6%|▌ | 111/1840 [00:01<00:20, 84.96it/s]
1547
  7%|▋ | 120/1840 [00:01<00:20, 83.45it/s]
1548
  7%|▋ | 129/1840 [00:01<00:21, 79.72it/s]
1549
  8%|▊ | 138/1840 [00:01<00:20, 81.35it/s]
1550
  8%|▊ | 147/1840 [00:01<00:20, 82.23it/s]
1551
  8%|▊ | 156/1840 [00:01<00:21, 78.29it/s]
1552
  9%|▉ | 164/1840 [00:02<00:21, 76.51it/s]
1553
  9%|▉ | 173/1840 [00:02<00:21, 78.43it/s]
1554
  10%|▉ | 183/1840 [00:02<00:19, 82.86it/s]
1555
  10%|█ | 192/1840 [00:02<00:19, 84.17it/s]
1556
  11%|█ | 201/1840 [00:02<00:19, 82.31it/s]
1557
  11%|█▏ | 210/1840 [00:02<00:20, 79.83it/s]
1558
  12%|█▏ | 219/1840 [00:02<00:20, 80.69it/s]
1559
  12%|█▏ | 228/1840 [00:02<00:19, 82.07it/s]
1560
  13%|█▎ | 237/1840 [00:02<00:21, 76.28it/s]
1561
  13%|█▎ | 246/1840 [00:03<00:20, 78.24it/s]
1562
  14%|█▍ | 254/1840 [00:03<00:20, 78.47it/s]
1563
  14%|█▍ | 262/1840 [00:03<00:20, 78.54it/s]
1564
  15%|█▍ | 270/1840 [00:03<00:20, 78.47it/s]
1565
  15%|█▌ | 280/1840 [00:03<00:18, 83.71it/s]
1566
  16%|█▌ | 290/1840 [00:03<00:18, 83.65it/s]
1567
  16%|█▋ | 299/1840 [00:03<00:18, 85.16it/s]
1568
  17%|█▋ | 308/1840 [00:03<00:18, 83.08it/s]
1569
  17%|█▋ | 317/1840 [00:03<00:17, 84.68it/s]
1570
  18%|█▊ | 326/1840 [00:04<00:18, 83.75it/s]
1571
  18%|█▊ | 335/1840 [00:04<00:18, 82.34it/s]
1572
  19%|█▉ | 345/1840 [00:04<00:17, 84.84it/s]
1573
  19%|█▉ | 354/1840 [00:04<00:18, 80.58it/s]
1574
  20%|█▉ | 363/1840 [00:04<00:18, 80.45it/s]
1575
  20%|██ | 373/1840 [00:04<00:17, 82.94it/s]
1576
  21%|██ | 382/1840 [00:04<00:17, 84.75it/s]
1577
  21%|██▏ | 391/1840 [00:04<00:17, 83.12it/s]
1578
  22%|██▏ | 400/1840 [00:04<00:17, 81.50it/s]
1579
  22%|██▏ | 409/1840 [00:05<00:17, 82.12it/s]
1580
  23%|██▎ | 418/1840 [00:05<00:17, 79.95it/s]
1581
  23%|██▎ | 427/1840 [00:05<00:17, 80.56it/s]
1582
  24%|██▎ | 436/1840 [00:05<00:17, 79.79it/s]
1583
  24%|██▍ | 445/1840 [00:05<00:17, 80.57it/s]
1584
  25%|██▍ | 454/1840 [00:05<00:17, 80.30it/s]
1585
  25%|██▌ | 463/1840 [00:05<00:16, 82.64it/s]
1586
  26%|██▌ | 472/1840 [00:05<00:16, 84.64it/s]
1587
  26%|██▌ | 481/1840 [00:05<00:17, 76.17it/s]
1588
  27%|██▋ | 490/1840 [00:06<00:17, 79.25it/s]
1589
  27%|██▋ | 499/1840 [00:06<00:17, 76.61it/s]
1590
  28%|██▊ | 508/1840 [00:06<00:17, 77.81it/s]
1591
  28%|██▊ | 518/1840 [00:06<00:16, 81.49it/s]
1592
  29%|██▊ | 527/1840 [00:06<00:15, 82.26it/s]
1593
  29%|██▉ | 536/1840 [00:06<00:16, 81.29it/s]
1594
  30%|██▉ | 545/1840 [00:06<00:15, 81.42it/s]
1595
  30%|███ | 554/1840 [00:06<00:15, 82.78it/s]
1596
  31%|███ | 563/1840 [00:06<00:15, 80.10it/s]
1597
  31%|███ | 573/1840 [00:07<00:15, 83.39it/s]
1598
  32%|███▏ | 583/1840 [00:07<00:14, 85.46it/s]
1599
  32%|███▏ | 592/1840 [00:07<00:15, 82.90it/s]
1600
  33%|███▎ | 601/1840 [00:07<00:15, 81.01it/s]
1601
  33%|███▎ | 611/1840 [00:07<00:14, 82.55it/s]
1602
  34%|███▎ | 620/1840 [00:07<00:15, 79.88it/s]
1603
  34%|███▍ | 629/1840 [00:07<00:15, 79.21it/s]
1604
  35%|███▍ | 639/1840 [00:07<00:14, 83.45it/s]
1605
  35%|███▌ | 649/1840 [00:07<00:13, 85.57it/s]
1606
  36%|███▌ | 658/1840 [00:08<00:14, 81.64it/s]
1607
  36%|███▋ | 667/1840 [00:08<00:14, 80.85it/s]
1608
  37%|███▋ | 677/1840 [00:08<00:13, 84.92it/s]
1609
  37%|███▋ | 686/1840 [00:08<00:13, 82.87it/s]
1610
  38%|███▊ | 695/1840 [00:08<00:13, 83.04it/s]
1611
  38%|███▊ | 705/1840 [00:08<00:13, 86.04it/s]
1612
  39%|███▉ | 714/1840 [00:08<00:13, 84.27it/s]
1613
  39%|███▉ | 724/1840 [00:08<00:12, 87.27it/s]
1614
  40%|███▉ | 734/1840 [00:08<00:12, 89.45it/s]
1615
  40%|████ | 743/1840 [00:09<00:12, 89.01it/s]
1616
  41%|████ | 752/1840 [00:09<00:12, 86.83it/s]
1617
  41%|████▏ | 762/1840 [00:09<00:12, 89.06it/s]
1618
  42%|████▏ | 771/1840 [00:09<00:12, 87.52it/s]
1619
  42%|████▏ | 780/1840 [00:09<00:12, 87.85it/s]
1620
  43%|████▎ | 789/1840 [00:09<00:12, 84.43it/s]
1621
  43%|████▎ | 798/1840 [00:09<00:12, 84.40it/s]
1622
  44%|████▍ | 807/1840 [00:09<00:12, 82.54it/s]
1623
  44%|████▍ | 816/1840 [00:09<00:12, 83.63it/s]
1624
  45%|████▍ | 825/1840 [00:10<00:12, 84.00it/s]
1625
  45%|████▌ | 835/1840 [00:10<00:11, 87.17it/s]
1626
  46%|████▌ | 844/1840 [00:10<00:12, 82.78it/s]
1627
  46%|████▋ | 853/1840 [00:10<00:11, 84.72it/s]
1628
  47%|████▋ | 863/1840 [00:10<00:11, 86.79it/s]
1629
  47%|████▋ | 873/1840 [00:10<00:11, 86.72it/s]
1630
  48%|████▊ | 882/1840 [00:10<00:11, 82.75it/s]
1631
  48%|████▊ | 891/1840 [00:10<00:11, 82.52it/s]
1632
  49%|████▉ | 900/1840 [00:10<00:11, 81.40it/s]
1633
  49%|████▉ | 910/1840 [00:11<00:10, 85.07it/s]
1634
  50%|████▉ | 919/1840 [00:11<00:10, 86.09it/s]
1635
  50%|█████ | 929/1840 [00:11<00:10, 87.65it/s]
1636
  51%|█████ | 938/1840 [00:11<00:10, 84.08it/s]
1637
  51%|█████▏ | 947/1840 [00:11<00:10, 84.62it/s]
1638
  52%|█████▏ | 956/1840 [00:11<00:10, 81.28it/s]
1639
  52%|█████▏ | 965/1840 [00:11<00:10, 81.73it/s]
1640
  53%|█████▎ | 975/1840 [00:11<00:10, 84.56it/s]
1641
  53%|█████▎ | 984/1840 [00:11<00:10, 81.54it/s]
1642
  54%|█████▍ | 994/1840 [00:12<00:10, 84.11it/s]
1643
  55%|█████▍ | 1003/1840 [00:12<00:10, 82.91it/s]
1644
  55%|█████▌ | 1012/1840 [00:12<00:10, 81.54it/s]
1645
  55%|█████▌ | 1021/1840 [00:12<00:09, 82.96it/s]
1646
  56%|█████▌ | 1031/1840 [00:12<00:09, 86.38it/s]
1647
  57%|█████▋ | 1041/1840 [00:12<00:09, 86.05it/s]
1648
  57%|█████▋ | 1050/1840 [00:12<00:09, 85.53it/s]
1649
  58%|█████▊ | 1059/1840 [00:12<00:09, 84.24it/s]
1650
  58%|█████▊ | 1068/1840 [00:12<00:09, 83.89it/s]
1651
  59%|█████▊ | 1078/1840 [00:13<00:08, 86.49it/s]
1652
  59%|█████▉ | 1088/1840 [00:13<00:08, 88.79it/s]
1653
  60%|█████▉ | 1098/1840 [00:13<00:08, 90.45it/s]
1654
  60%|██████ | 1108/1840 [00:13<00:08, 90.80it/s]
1655
  61%|██████ | 1118/1840 [00:13<00:08, 86.64it/s]
1656
  61%|██████▏ | 1127/1840 [00:13<00:08, 85.05it/s]
1657
  62%|██████▏ | 1137/1840 [00:13<00:08, 86.61it/s]
1658
  62%|██████▏ | 1146/1840 [00:13<00:07, 87.34it/s]
1659
  63%|██████▎ | 1155/1840 [00:13<00:07, 87.26it/s]
1660
  63%|██████▎ | 1165/1840 [00:14<00:07, 88.13it/s]
1661
  64%|██████▍ | 1174/1840 [00:14<00:07, 83.58it/s]
1662
  64%|██████▍ | 1183/1840 [00:14<00:07, 84.91it/s]
1663
  65%|██████▍ | 1192/1840 [00:14<00:07, 81.30it/s]
1664
  65%|██████▌ | 1202/1840 [00:14<00:07, 85.01it/s]
1665
  66%|██████▌ | 1211/1840 [00:14<00:07, 84.77it/s]
1666
  66%|██████▋ | 1220/1840 [00:14<00:07, 80.78it/s]
1667
  67%|██████▋ | 1229/1840 [00:14<00:07, 77.99it/s]
1668
  67%|██████▋ | 1239/1840 [00:14<00:07, 82.43it/s]
1669
  68%|██████▊ | 1249/1840 [00:15<00:07, 84.02it/s]
1670
  68%|██████▊ | 1259/1840 [00:15<00:06, 86.05it/s]
1671
  69%|██████▉ | 1268/1840 [00:15<00:07, 81.33it/s]
1672
  69%|██████▉ | 1277/1840 [00:15<00:06, 81.61it/s]
1673
  70%|██████▉ | 1286/1840 [00:15<00:06, 83.73it/s]
1674
  70%|███████ | 1295/1840 [00:15<00:06, 85.09it/s]
1675
  71%|███████ | 1305/1840 [00:15<00:06, 87.44it/s]
1676
  71%|███████▏ | 1315/1840 [00:15<00:05, 89.24it/s]
1677
  72%|███████▏ | 1325/1840 [00:15<00:05, 90.82it/s]
1678
  73%|███████▎ | 1335/1840 [00:16<00:05, 89.40it/s]
1679
  73%|███████▎ | 1344/1840 [00:16<00:05, 88.30it/s]
1680
  74%|███████▎ | 1353/1840 [00:16<00:05, 86.55it/s]
1681
  74%|███████▍ | 1362/1840 [00:16<00:05, 85.78it/s]
1682
  75%|███████▍ | 1372/1840 [00:16<00:05, 87.53it/s]
1683
  75%|███████▌ | 1381/1840 [00:16<00:05, 83.66it/s]
1684
  76%|███████▌ | 1391/1840 [00:16<00:05, 86.29it/s]
1685
  76%|███████▌ | 1400/1840 [00:16<00:05, 84.99it/s]
1686
  77%|███████▋ | 1410/1840 [00:16<00:04, 87.43it/s]
1687
  77%|███████▋ | 1420/1840 [00:17<00:04, 89.10it/s]
1688
  78%|███████▊ | 1429/1840 [00:17<00:04, 88.03it/s]
1689
  78%|███████▊ | 1438/1840 [00:17<00:04, 83.40it/s]
1690
  79%|███████▊ | 1447/1840 [00:17<00:04, 80.09it/s]
1691
  79%|███████▉ | 1456/1840 [00:17<00:04, 79.92it/s]
1692
  80%|███████▉ | 1466/1840 [00:17<00:04, 82.65it/s]
1693
  80%|████████ | 1475/1840 [00:17<00:04, 83.62it/s]
1694
  81%|████████ | 1484/1840 [00:17<00:04, 78.30it/s]
1695
  81%|████████ | 1493/1840 [00:17<00:04, 80.11it/s]
1696
  82%|████████▏ | 1502/1840 [00:18<00:04, 75.31it/s]
1697
  82%|████████▏ | 1511/1840 [00:18<00:04, 75.48it/s]
1698
  83%|████████▎ | 1520/1840 [00:18<00:04, 77.35it/s]
1699
  83%|████████▎ | 1529/1840 [00:18<00:03, 79.48it/s]
1700
  84%|████████▎ | 1538/1840 [00:18<00:03, 77.28it/s]
1701
  84%|████████▍ | 1547/1840 [00:18<00:03, 78.65it/s]
1702
  85%|████████▍ | 1556/1840 [00:18<00:03, 79.70it/s]
1703
  85%|████████▌ | 1565/1840 [00:18<00:03, 79.11it/s]
1704
  86%|████████▌ | 1574/1840 [00:18<00:03, 79.32it/s]
1705
  86%|████████▌ | 1582/1840 [00:19<00:03, 76.10it/s]
1706
  86%|████████▋ | 1591/1840 [00:19<00:03, 77.80it/s]
1707
  87%|████████▋ | 1599/1840 [00:19<00:03, 76.77it/s]
1708
  87%|████████▋ | 1608/1840 [00:19<00:02, 78.00it/s]
1709
  88%|████████▊ | 1617/1840 [00:19<00:02, 80.43it/s]
1710
  88%|████████▊ | 1626/1840 [00:19<00:02, 81.36it/s]
1711
  89%|████████▉ | 1635/1840 [00:19<00:02, 71.94it/s]
1712
  89%|████████▉ | 1643/1840 [00:19<00:02, 73.05it/s]
1713
  90%|████████▉ | 1652/1840 [00:20<00:02, 75.07it/s]
1714
  90%|█████████ | 1661/1840 [00:20<00:02, 78.13it/s]
1715
  91%|█████████ | 1670/1840 [00:20<00:02, 78.46it/s]
1716
  91%|█████████ | 1678/1840 [00:20<00:02, 78.03it/s]
1717
  92%|█████████▏| 1687/1840 [00:20<00:01, 79.05it/s]
1718
  92%|█████████▏| 1697/1840 [00:20<00:01, 82.97it/s]
1719
  93%|█████████▎| 1706/1840 [00:20<00:01, 80.42it/s]
1720
  93%|██���██████▎| 1715/1840 [00:20<00:01, 78.43it/s]
1721
  94%|█████████▎| 1724/1840 [00:20<00:01, 80.51it/s]
1722
  94%|█████████▍| 1733/1840 [00:21<00:01, 77.32it/s]
1723
  95%|█████████▍| 1742/1840 [00:21<00:01, 80.56it/s]
1724
  95%|█████████▌| 1752/1840 [00:21<00:01, 83.60it/s]
1725
  96%|█████████▌| 1761/1840 [00:21<00:00, 84.69it/s]
1726
  96%|█████████▌| 1770/1840 [00:21<00:00, 82.30it/s]
1727
  97%|█████████▋| 1779/1840 [00:21<00:00, 82.74it/s]
1728
  97%|█████████▋| 1788/1840 [00:21<00:00, 81.59it/s]
1729
  98%|█████████▊| 1797/1840 [00:21<00:00, 83.44it/s]
1730
  98%|█████████▊| 1806/1840 [00:21<00:00, 79.67it/s]
1731
  99%|█████████▊| 1815/1840 [00:22<00:00, 77.02it/s]
1732
  99%|█████████▉| 1824/1840 [00:22<00:00, 79.80it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1429
  {'eval_loss': 0.00769586768001318, 'eval_precision': 0.9194876486733761, 'eval_recall': 0.9366262814538676, 'eval_f1': 0.92797783933518, 'eval_accuracy': 0.9987511511734993, 'eval_runtime': 15.2047, 'eval_samples_per_second': 456.832, 'eval_steps_per_second': 57.153, 'epoch': 10.0}
1430
  {'train_runtime': 2196.5741, 'train_samples_per_second': 147.716, 'train_steps_per_second': 2.308, 'train_loss': 0.0028968164414402532, 'epoch': 10.0}
1431
 
1432
+ ***** train metrics *****
1433
+ epoch = 10.0
1434
+ total_flos = 12990183GF
1435
+ train_loss = 0.0029
1436
+ train_runtime = 0:36:36.57
1437
+ train_samples = 32447
1438
+ train_samples_per_second = 147.716
1439
+ train_steps_per_second = 2.308
1440
+ 09/09/2024 13:27:22 - INFO - __main__ - *** Evaluate ***
1441
+ [INFO|trainer.py:811] 2024-09-09 13:27:22,340 >> The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `BertForTokenClassification.forward`, you can safely ignore this message.
1442
+ [INFO|trainer.py:3819] 2024-09-09 13:27:22,343 >>
1443
+ ***** Running Evaluation *****
1444
+ [INFO|trainer.py:3821] 2024-09-09 13:27:22,343 >> Num examples = 6946
1445
+ [INFO|trainer.py:3824] 2024-09-09 13:27:22,343 >> Batch size = 8
1446
+
1447
  0%| | 0/869 [00:00<?, ?it/s]
1448
  1%| | 10/869 [00:00<00:09, 93.14it/s]
1449
  2%|▏ | 20/869 [00:00<00:10, 79.68it/s]
1450
  3%|▎ | 29/869 [00:00<00:10, 78.36it/s]
1451
  4%|▍ | 37/869 [00:00<00:11, 75.35it/s]
1452
  5%|▌ | 46/869 [00:00<00:10, 78.54it/s]
1453
  6%|▋ | 55/869 [00:00<00:10, 81.34it/s]
1454
  7%|▋ | 64/869 [00:00<00:10, 77.39it/s]
1455
  8%|▊ | 72/869 [00:00<00:10, 77.10it/s]
1456
  9%|▉ | 81/869 [00:01<00:09, 80.79it/s]
1457
  10%|█ | 90/869 [00:01<00:09, 83.27it/s]
1458
  12%|█▏ | 100/869 [00:01<00:08, 85.89it/s]
1459
  13%|█▎ | 109/869 [00:01<00:09, 82.19it/s]
1460
  14%|█▎ | 118/869 [00:01<00:09, 80.71it/s]
1461
  15%|█▍ | 127/869 [00:01<00:09, 79.46it/s]
1462
  16%|█▌ | 136/869 [00:01<00:09, 80.95it/s]
1463
  17%|█▋ | 145/869 [00:01<00:09, 76.58it/s]
1464
  18%|█▊ | 154/869 [00:01<00:09, 78.77it/s]
1465
  19%|█▉ | 163/869 [00:02<00:08, 78.83it/s]
1466
  20%|█▉ | 171/869 [00:02<00:09, 76.42it/s]
1467
  21%|██ | 179/869 [00:02<00:09, 75.56it/s]
1468
  22%|██▏ | 188/869 [00:02<00:08, 77.42it/s]
1469
  23%|██▎ | 196/869 [00:02<00:09, 74.78it/s]
1470
  24%|██▎ | 205/869 [00:02<00:08, 77.47it/s]
1471
  25%|██▍ | 214/869 [00:02<00:08, 79.04it/s]
1472
  26%|██▌ | 223/869 [00:02<00:08, 80.38it/s]
1473
  27%|██▋ | 232/869 [00:02<00:08, 76.59it/s]
1474
  28%|██▊ | 240/869 [00:03<00:08, 77.10it/s]
1475
  29%|██▊ | 248/869 [00:03<00:08, 71.77it/s]
1476
  30%|██▉ | 257/869 [00:03<00:08, 74.54it/s]
1477
  30%|███ | 265/869 [00:03<00:08, 73.96it/s]
1478
  31%|███▏ | 273/869 [00:03<00:07, 74.71it/s]
1479
  32%|███▏ | 281/869 [00:03<00:07, 74.68it/s]
1480
  33%|███▎ | 289/869 [00:03<00:07, 73.95it/s]
1481
  34%|███▍ | 297/869 [00:03<00:07, 74.29it/s]
1482
  35%|███▌ | 305/869 [00:03<00:08, 70.32it/s]
1483
  36%|███▌ | 313/869 [00:04<00:07, 71.72it/s]
1484
  37%|███▋ | 321/869 [00:04<00:07, 71.00it/s]
1485
  38%|███▊ | 330/869 [00:04<00:07, 74.74it/s]
1486
  39%|███▉ | 339/869 [00:04<00:06, 76.71it/s]
1487
  40%|███▉ | 347/869 [00:04<00:07, 72.09it/s]
1488
  41%|████ | 355/869 [00:04<00:07, 72.99it/s]
1489
  42%|████▏ | 364/869 [00:04<00:06, 76.76it/s]
1490
  43%|████▎ | 373/869 [00:04<00:06, 74.39it/s]
1491
  44%|████▍ | 381/869 [00:04<00:06, 75.28it/s]
1492
  45%|████▍ | 389/869 [00:05<00:06, 70.47it/s]
1493
  46%|████▌ | 398/869 [00:05<00:06, 73.16it/s]
1494
  47%|████▋ | 407/869 [00:05<00:06, 75.53it/s]
1495
  48%|████▊ | 416/869 [00:05<00:05, 78.44it/s]
1496
  49%|████▉ | 424/869 [00:05<00:05, 78.67it/s]
1497
  50%|████▉ | 433/869 [00:05<00:05, 79.51it/s]
1498
  51%|█████ | 441/869 [00:05<00:05, 77.23it/s]
1499
  52%|█████▏ | 449/869 [00:05<00:05, 75.66it/s]
1500
  53%|█████▎ | 457/869 [00:05<00:05, 76.56it/s]
1501
  54%|█████▎ | 465/869 [00:06<00:05, 74.67it/s]
1502
  55%|█████▍ | 475/869 [00:06<00:04, 79.79it/s]
1503
  56%|█████▌ | 484/869 [00:06<00:04, 82.19it/s]
1504
  57%|█████▋ | 493/869 [00:06<00:04, 76.64it/s]
1505
  58%|█████▊ | 501/869 [00:06<00:04, 76.35it/s]
1506
  59%|█████▊ | 509/869 [00:06<00:04, 73.64it/s]
1507
  60%|█████▉ | 518/869 [00:06<00:04, 76.46it/s]
1508
  61%|██████ | 526/869 [00:06<00:04, 71.73it/s]
1509
  61%|██████▏ | 534/869 [00:06<00:04, 73.69it/s]
1510
  62%|██████▏ | 542/869 [00:07<00:04, 70.92it/s]
1511
  63%|██████▎ | 551/869 [00:07<00:04, 74.82it/s]
1512
  64%|██████▍ | 559/869 [00:07<00:04, 74.53it/s]
1513
  65%|██████▌ | 567/869 [00:07<00:04, 73.79it/s]
1514
  66%|██████▌ | 575/869 [00:07<00:03, 74.82it/s]
1515
  67%|██████▋ | 583/869 [00:07<00:03, 73.69it/s]
1516
  68%|██████▊ | 591/869 [00:07<00:03, 74.15it/s]
1517
  69%|██████▉ | 600/869 [00:07<00:03, 77.09it/s]
1518
  70%|███████ | 609/869 [00:07<00:03, 78.40it/s]
1519
  71%|███████ | 617/869 [00:08<00:03, 77.00it/s]
1520
  72%|███████▏ | 625/869 [00:08<00:03, 75.42it/s]
1521
  73%|███████▎ | 633/869 [00:08<00:03, 75.64it/s]
1522
  74%|███████▍ | 641/869 [00:08<00:03, 75.26it/s]
1523
  75%|███████▍ | 650/869 [00:08<00:02, 78.48it/s]
1524
  76%|███████▌ | 658/869 [00:08<00:02, 77.25it/s]
1525
  77%|███████▋ | 667/869 [00:08<00:02, 78.48it/s]
1526
  78%|███████▊ | 677/869 [00:08<00:02, 82.17it/s]
1527
  79%|███████▉ | 686/869 [00:09<00:02, 70.63it/s]
1528
  80%|███████▉ | 694/869 [00:09<00:02, 72.95it/s]
1529
  81%|████████ | 702/869 [00:09<00:02, 72.96it/s]
1530
  82%|████████▏ | 711/869 [00:09<00:02, 75.37it/s]
1531
  83%|████████▎ | 719/869 [00:09<00:01, 75.52it/s]
1532
  84%|████████▍ | 728/869 [00:09<00:01, 77.16it/s]
1533
  85%|████████▍ | 736/869 [00:09<00:01, 77.53it/s]
1534
  86%|████████▌ | 744/869 [00:09<00:01, 77.74it/s]
1535
  87%|████████▋ | 752/869 [00:09<00:01, 74.45it/s]
1536
  88%|████████▊ | 761/869 [00:09<00:01, 77.76it/s]
1537
  88%|████████▊ | 769/869 [00:10<00:01, 73.83it/s]
1538
  89%|████████▉ | 777/869 [00:10<00:01, 65.48it/s]
1539
  90%|█████████ | 785/869 [00:10<00:01, 67.59it/s]
1540
  91%|█████████▏| 794/869 [00:10<00:01, 71.22it/s]
1541
  92%|█████████▏| 802/869 [00:10<00:00, 73.53it/s]
1542
  93%|█████████▎| 810/869 [00:10<00:00, 74.76it/s]
1543
  94%|█████████▍| 818/869 [00:10<00:00, 75.19it/s]
1544
  95%|█████████▌| 826/869 [00:10<00:00, 75.37it/s]
1545
  96%|█████████▌| 834/869 [00:10<00:00, 74.91it/s]
1546
  97%|█████████▋| 843/869 [00:11<00:00, 77.97it/s]
1547
  98%|█████████▊| 852/869 [00:11<00:00, 79.15it/s]
1548
  99%|█████████▉| 860/869 [00:11<00:00, 74.82it/s]
1549
+ ***** eval metrics *****
1550
+ epoch = 10.0
1551
+ eval_accuracy = 0.9988
1552
+ eval_f1 = 0.9335
1553
+ eval_loss = 0.0076
1554
+ eval_precision = 0.925
1555
+ eval_recall = 0.9422
1556
+ eval_runtime = 0:00:15.18
1557
+ eval_samples = 6946
1558
+ eval_samples_per_second = 457.519
1559
+ eval_steps_per_second = 57.239
1560
+ 09/09/2024 13:27:37 - INFO - __main__ - *** Predict ***
1561
+ [INFO|trainer.py:811] 2024-09-09 13:27:37,532 >> The following columns in the test set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `BertForTokenClassification.forward`, you can safely ignore this message.
1562
+ [INFO|trainer.py:3819] 2024-09-09 13:27:37,534 >>
1563
+ ***** Running Prediction *****
1564
+ [INFO|trainer.py:3821] 2024-09-09 13:27:37,534 >> Num examples = 14715
1565
+ [INFO|trainer.py:3824] 2024-09-09 13:27:37,534 >> Batch size = 8
1566
+
1567
  0%| | 0/1840 [00:00<?, ?it/s]
1568
  1%| | 11/1840 [00:00<00:18, 98.85it/s]
1569
  1%| | 21/1840 [00:00<00:22, 79.68it/s]
1570
  2%|▏ | 30/1840 [00:00<00:22, 79.32it/s]
1571
  2%|▏ | 39/1840 [00:00<00:22, 79.99it/s]
1572
  3%|▎ | 48/1840 [00:00<00:22, 81.01it/s]
1573
  3%|▎ | 57/1840 [00:00<00:22, 80.70it/s]
1574
  4%|▎ | 66/1840 [00:00<00:22, 78.15it/s]
1575
  4%|▍ | 74/1840 [00:00<00:22, 78.25it/s]
1576
  5%|▍ | 83/1840 [00:01<00:21, 80.09it/s]
1577
  5%|▌ | 92/1840 [00:01<00:21, 79.74it/s]
1578
  6%|▌ | 102/1840 [00:01<00:20, 83.78it/s]
1579
  6%|▌ | 111/1840 [00:01<00:20, 84.96it/s]
1580
  7%|▋ | 120/1840 [00:01<00:20, 83.45it/s]
1581
  7%|▋ | 129/1840 [00:01<00:21, 79.72it/s]
1582
  8%|▊ | 138/1840 [00:01<00:20, 81.35it/s]
1583
  8%|▊ | 147/1840 [00:01<00:20, 82.23it/s]
1584
  8%|▊ | 156/1840 [00:01<00:21, 78.29it/s]
1585
  9%|▉ | 164/1840 [00:02<00:21, 76.51it/s]
1586
  9%|▉ | 173/1840 [00:02<00:21, 78.43it/s]
1587
  10%|▉ | 183/1840 [00:02<00:19, 82.86it/s]
1588
  10%|█ | 192/1840 [00:02<00:19, 84.17it/s]
1589
  11%|█ | 201/1840 [00:02<00:19, 82.31it/s]
1590
  11%|█▏ | 210/1840 [00:02<00:20, 79.83it/s]
1591
  12%|█▏ | 219/1840 [00:02<00:20, 80.69it/s]
1592
  12%|█▏ | 228/1840 [00:02<00:19, 82.07it/s]
1593
  13%|█▎ | 237/1840 [00:02<00:21, 76.28it/s]
1594
  13%|█▎ | 246/1840 [00:03<00:20, 78.24it/s]
1595
  14%|█▍ | 254/1840 [00:03<00:20, 78.47it/s]
1596
  14%|█▍ | 262/1840 [00:03<00:20, 78.54it/s]
1597
  15%|█▍ | 270/1840 [00:03<00:20, 78.47it/s]
1598
  15%|█▌ | 280/1840 [00:03<00:18, 83.71it/s]
1599
  16%|█▌ | 290/1840 [00:03<00:18, 83.65it/s]
1600
  16%|█▋ | 299/1840 [00:03<00:18, 85.16it/s]
1601
  17%|█▋ | 308/1840 [00:03<00:18, 83.08it/s]
1602
  17%|█▋ | 317/1840 [00:03<00:17, 84.68it/s]
1603
  18%|█▊ | 326/1840 [00:04<00:18, 83.75it/s]
1604
  18%|█▊ | 335/1840 [00:04<00:18, 82.34it/s]
1605
  19%|█▉ | 345/1840 [00:04<00:17, 84.84it/s]
1606
  19%|█▉ | 354/1840 [00:04<00:18, 80.58it/s]
1607
  20%|█▉ | 363/1840 [00:04<00:18, 80.45it/s]
1608
  20%|██ | 373/1840 [00:04<00:17, 82.94it/s]
1609
  21%|██ | 382/1840 [00:04<00:17, 84.75it/s]
1610
  21%|██▏ | 391/1840 [00:04<00:17, 83.12it/s]
1611
  22%|██▏ | 400/1840 [00:04<00:17, 81.50it/s]
1612
  22%|██▏ | 409/1840 [00:05<00:17, 82.12it/s]
1613
  23%|██▎ | 418/1840 [00:05<00:17, 79.95it/s]
1614
  23%|██▎ | 427/1840 [00:05<00:17, 80.56it/s]
1615
  24%|██▎ | 436/1840 [00:05<00:17, 79.79it/s]
1616
  24%|██▍ | 445/1840 [00:05<00:17, 80.57it/s]
1617
  25%|██▍ | 454/1840 [00:05<00:17, 80.30it/s]
1618
  25%|██▌ | 463/1840 [00:05<00:16, 82.64it/s]
1619
  26%|██▌ | 472/1840 [00:05<00:16, 84.64it/s]
1620
  26%|██▌ | 481/1840 [00:05<00:17, 76.17it/s]
1621
  27%|██▋ | 490/1840 [00:06<00:17, 79.25it/s]
1622
  27%|██▋ | 499/1840 [00:06<00:17, 76.61it/s]
1623
  28%|██▊ | 508/1840 [00:06<00:17, 77.81it/s]
1624
  28%|██▊ | 518/1840 [00:06<00:16, 81.49it/s]
1625
  29%|██▊ | 527/1840 [00:06<00:15, 82.26it/s]
1626
  29%|██▉ | 536/1840 [00:06<00:16, 81.29it/s]
1627
  30%|██▉ | 545/1840 [00:06<00:15, 81.42it/s]
1628
  30%|███ | 554/1840 [00:06<00:15, 82.78it/s]
1629
  31%|███ | 563/1840 [00:06<00:15, 80.10it/s]
1630
  31%|███ | 573/1840 [00:07<00:15, 83.39it/s]
1631
  32%|███▏ | 583/1840 [00:07<00:14, 85.46it/s]
1632
  32%|███▏ | 592/1840 [00:07<00:15, 82.90it/s]
1633
  33%|███▎ | 601/1840 [00:07<00:15, 81.01it/s]
1634
  33%|███▎ | 611/1840 [00:07<00:14, 82.55it/s]
1635
  34%|███▎ | 620/1840 [00:07<00:15, 79.88it/s]
1636
  34%|███▍ | 629/1840 [00:07<00:15, 79.21it/s]
1637
  35%|███▍ | 639/1840 [00:07<00:14, 83.45it/s]
1638
  35%|███▌ | 649/1840 [00:07<00:13, 85.57it/s]
1639
  36%|███▌ | 658/1840 [00:08<00:14, 81.64it/s]
1640
  36%|███▋ | 667/1840 [00:08<00:14, 80.85it/s]
1641
  37%|███▋ | 677/1840 [00:08<00:13, 84.92it/s]
1642
  37%|███▋ | 686/1840 [00:08<00:13, 82.87it/s]
1643
  38%|███▊ | 695/1840 [00:08<00:13, 83.04it/s]
1644
  38%|███▊ | 705/1840 [00:08<00:13, 86.04it/s]
1645
  39%|███▉ | 714/1840 [00:08<00:13, 84.27it/s]
1646
  39%|███▉ | 724/1840 [00:08<00:12, 87.27it/s]
1647
  40%|███▉ | 734/1840 [00:08<00:12, 89.45it/s]
1648
  40%|████ | 743/1840 [00:09<00:12, 89.01it/s]
1649
  41%|████ | 752/1840 [00:09<00:12, 86.83it/s]
1650
  41%|████▏ | 762/1840 [00:09<00:12, 89.06it/s]
1651
  42%|████▏ | 771/1840 [00:09<00:12, 87.52it/s]
1652
  42%|████▏ | 780/1840 [00:09<00:12, 87.85it/s]
1653
  43%|████▎ | 789/1840 [00:09<00:12, 84.43it/s]
1654
  43%|████▎ | 798/1840 [00:09<00:12, 84.40it/s]
1655
  44%|████▍ | 807/1840 [00:09<00:12, 82.54it/s]
1656
  44%|████▍ | 816/1840 [00:09<00:12, 83.63it/s]
1657
  45%|████▍ | 825/1840 [00:10<00:12, 84.00it/s]
1658
  45%|████▌ | 835/1840 [00:10<00:11, 87.17it/s]
1659
  46%|████▌ | 844/1840 [00:10<00:12, 82.78it/s]
1660
  46%|████▋ | 853/1840 [00:10<00:11, 84.72it/s]
1661
  47%|████▋ | 863/1840 [00:10<00:11, 86.79it/s]
1662
  47%|████▋ | 873/1840 [00:10<00:11, 86.72it/s]
1663
  48%|████▊ | 882/1840 [00:10<00:11, 82.75it/s]
1664
  48%|████▊ | 891/1840 [00:10<00:11, 82.52it/s]
1665
  49%|████▉ | 900/1840 [00:10<00:11, 81.40it/s]
1666
  49%|████▉ | 910/1840 [00:11<00:10, 85.07it/s]
1667
  50%|████▉ | 919/1840 [00:11<00:10, 86.09it/s]
1668
  50%|█████ | 929/1840 [00:11<00:10, 87.65it/s]
1669
  51%|█████ | 938/1840 [00:11<00:10, 84.08it/s]
1670
  51%|█████▏ | 947/1840 [00:11<00:10, 84.62it/s]
1671
  52%|█████▏ | 956/1840 [00:11<00:10, 81.28it/s]
1672
  52%|█████▏ | 965/1840 [00:11<00:10, 81.73it/s]
1673
  53%|█████▎ | 975/1840 [00:11<00:10, 84.56it/s]
1674
  53%|█████▎ | 984/1840 [00:11<00:10, 81.54it/s]
1675
  54%|█████▍ | 994/1840 [00:12<00:10, 84.11it/s]
1676
  55%|█████▍ | 1003/1840 [00:12<00:10, 82.91it/s]
1677
  55%|█████▌ | 1012/1840 [00:12<00:10, 81.54it/s]
1678
  55%|█████▌ | 1021/1840 [00:12<00:09, 82.96it/s]
1679
  56%|█████▌ | 1031/1840 [00:12<00:09, 86.38it/s]
1680
  57%|█████▋ | 1041/1840 [00:12<00:09, 86.05it/s]
1681
  57%|█████▋ | 1050/1840 [00:12<00:09, 85.53it/s]
1682
  58%|█████▊ | 1059/1840 [00:12<00:09, 84.24it/s]
1683
  58%|█████▊ | 1068/1840 [00:12<00:09, 83.89it/s]
1684
  59%|█████▊ | 1078/1840 [00:13<00:08, 86.49it/s]
1685
  59%|█████▉ | 1088/1840 [00:13<00:08, 88.79it/s]
1686
  60%|█████▉ | 1098/1840 [00:13<00:08, 90.45it/s]
1687
  60%|██████ | 1108/1840 [00:13<00:08, 90.80it/s]
1688
  61%|██████ | 1118/1840 [00:13<00:08, 86.64it/s]
1689
  61%|██████▏ | 1127/1840 [00:13<00:08, 85.05it/s]
1690
  62%|██████▏ | 1137/1840 [00:13<00:08, 86.61it/s]
1691
  62%|██████▏ | 1146/1840 [00:13<00:07, 87.34it/s]
1692
  63%|██████▎ | 1155/1840 [00:13<00:07, 87.26it/s]
1693
  63%|██████▎ | 1165/1840 [00:14<00:07, 88.13it/s]
1694
  64%|██████▍ | 1174/1840 [00:14<00:07, 83.58it/s]
1695
  64%|██████▍ | 1183/1840 [00:14<00:07, 84.91it/s]
1696
  65%|██████▍ | 1192/1840 [00:14<00:07, 81.30it/s]
1697
  65%|██████▌ | 1202/1840 [00:14<00:07, 85.01it/s]
1698
  66%|██████▌ | 1211/1840 [00:14<00:07, 84.77it/s]
1699
  66%|██████▋ | 1220/1840 [00:14<00:07, 80.78it/s]
1700
  67%|██████▋ | 1229/1840 [00:14<00:07, 77.99it/s]
1701
  67%|██████▋ | 1239/1840 [00:14<00:07, 82.43it/s]
1702
  68%|██████▊ | 1249/1840 [00:15<00:07, 84.02it/s]
1703
  68%|██████▊ | 1259/1840 [00:15<00:06, 86.05it/s]
1704
  69%|██████▉ | 1268/1840 [00:15<00:07, 81.33it/s]
1705
  69%|██████▉ | 1277/1840 [00:15<00:06, 81.61it/s]
1706
  70%|██████▉ | 1286/1840 [00:15<00:06, 83.73it/s]
1707
  70%|███████ | 1295/1840 [00:15<00:06, 85.09it/s]
1708
  71%|███████ | 1305/1840 [00:15<00:06, 87.44it/s]
1709
  71%|███████▏ | 1315/1840 [00:15<00:05, 89.24it/s]
1710
  72%|███████▏ | 1325/1840 [00:15<00:05, 90.82it/s]
1711
  73%|███████▎ | 1335/1840 [00:16<00:05, 89.40it/s]
1712
  73%|███████▎ | 1344/1840 [00:16<00:05, 88.30it/s]
1713
  74%|███████▎ | 1353/1840 [00:16<00:05, 86.55it/s]
1714
  74%|███████▍ | 1362/1840 [00:16<00:05, 85.78it/s]
1715
  75%|███████▍ | 1372/1840 [00:16<00:05, 87.53it/s]
1716
  75%|███████▌ | 1381/1840 [00:16<00:05, 83.66it/s]
1717
  76%|███████▌ | 1391/1840 [00:16<00:05, 86.29it/s]
1718
  76%|███████▌ | 1400/1840 [00:16<00:05, 84.99it/s]
1719
  77%|███████▋ | 1410/1840 [00:16<00:04, 87.43it/s]
1720
  77%|███████▋ | 1420/1840 [00:17<00:04, 89.10it/s]
1721
  78%|███████▊ | 1429/1840 [00:17<00:04, 88.03it/s]
1722
  78%|███████▊ | 1438/1840 [00:17<00:04, 83.40it/s]
1723
  79%|███████▊ | 1447/1840 [00:17<00:04, 80.09it/s]
1724
  79%|███████▉ | 1456/1840 [00:17<00:04, 79.92it/s]
1725
  80%|███████▉ | 1466/1840 [00:17<00:04, 82.65it/s]
1726
  80%|████████ | 1475/1840 [00:17<00:04, 83.62it/s]
1727
  81%|████████ | 1484/1840 [00:17<00:04, 78.30it/s]
1728
  81%|████████ | 1493/1840 [00:17<00:04, 80.11it/s]
1729
  82%|████████▏ | 1502/1840 [00:18<00:04, 75.31it/s]
1730
  82%|████████▏ | 1511/1840 [00:18<00:04, 75.48it/s]
1731
  83%|████████▎ | 1520/1840 [00:18<00:04, 77.35it/s]
1732
  83%|████████▎ | 1529/1840 [00:18<00:03, 79.48it/s]
1733
  84%|████████▎ | 1538/1840 [00:18<00:03, 77.28it/s]
1734
  84%|████████▍ | 1547/1840 [00:18<00:03, 78.65it/s]
1735
  85%|████████▍ | 1556/1840 [00:18<00:03, 79.70it/s]
1736
  85%|████████▌ | 1565/1840 [00:18<00:03, 79.11it/s]
1737
  86%|████████▌ | 1574/1840 [00:18<00:03, 79.32it/s]
1738
  86%|████████▌ | 1582/1840 [00:19<00:03, 76.10it/s]
1739
  86%|████████▋ | 1591/1840 [00:19<00:03, 77.80it/s]
1740
  87%|████████▋ | 1599/1840 [00:19<00:03, 76.77it/s]
1741
  87%|████████▋ | 1608/1840 [00:19<00:02, 78.00it/s]
1742
  88%|████████▊ | 1617/1840 [00:19<00:02, 80.43it/s]
1743
  88%|████████▊ | 1626/1840 [00:19<00:02, 81.36it/s]
1744
  89%|████████▉ | 1635/1840 [00:19<00:02, 71.94it/s]
1745
  89%|████████▉ | 1643/1840 [00:19<00:02, 73.05it/s]
1746
  90%|████████▉ | 1652/1840 [00:20<00:02, 75.07it/s]
1747
  90%|█████████ | 1661/1840 [00:20<00:02, 78.13it/s]
1748
  91%|█████████ | 1670/1840 [00:20<00:02, 78.46it/s]
1749
  91%|█████████ | 1678/1840 [00:20<00:02, 78.03it/s]
1750
  92%|█████████▏| 1687/1840 [00:20<00:01, 79.05it/s]
1751
  92%|█████████▏| 1697/1840 [00:20<00:01, 82.97it/s]
1752
  93%|█████████▎| 1706/1840 [00:20<00:01, 80.42it/s]
1753
  93%|██���██████▎| 1715/1840 [00:20<00:01, 78.43it/s]
1754
  94%|█████████▎| 1724/1840 [00:20<00:01, 80.51it/s]
1755
  94%|█████████▍| 1733/1840 [00:21<00:01, 77.32it/s]
1756
  95%|█████████▍| 1742/1840 [00:21<00:01, 80.56it/s]
1757
  95%|█████████▌| 1752/1840 [00:21<00:01, 83.60it/s]
1758
  96%|█████████▌| 1761/1840 [00:21<00:00, 84.69it/s]
1759
  96%|█████████▌| 1770/1840 [00:21<00:00, 82.30it/s]
1760
  97%|█████████▋| 1779/1840 [00:21<00:00, 82.74it/s]
1761
  97%|█████████▋| 1788/1840 [00:21<00:00, 81.59it/s]
1762
  98%|█████████▊| 1797/1840 [00:21<00:00, 83.44it/s]
1763
  98%|█████████▊| 1806/1840 [00:21<00:00, 79.67it/s]
1764
  99%|█████████▊| 1815/1840 [00:22<00:00, 77.02it/s]
1765
  99%|█████████▉| 1824/1840 [00:22<00:00, 79.80it/s]
1766
+ [INFO|trainer.py:3503] 2024-09-09 13:28:06,818 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
1767
+ [INFO|configuration_utils.py:472] 2024-09-09 13:28:06,820 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
1768
+ [INFO|modeling_utils.py:2799] 2024-09-09 13:28:08,001 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
1769
+ [INFO|tokenization_utils_base.py:2684] 2024-09-09 13:28:08,002 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
1770
+ [INFO|tokenization_utils_base.py:2693] 2024-09-09 13:28:08,002 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
1771
+ ***** predict metrics *****
1772
+ predict_accuracy = 0.9987
1773
+ predict_f1 = 0.9203
1774
+ predict_loss = 0.0078
1775
+ predict_precision = 0.8938
1776
+ predict_recall = 0.9483
1777
+ predict_runtime = 0:00:28.74
1778
+ predict_samples_per_second = 511.904
1779
+ predict_steps_per_second = 64.01
1780
+
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 9.967845659163988,
3
- "total_flos": 4644619911314910.0,
4
- "train_loss": 0.050868147861573,
5
- "train_runtime": 855.1929,
6
- "train_samples": 9929,
7
- "train_samples_per_second": 116.102,
8
- "train_steps_per_second": 1.812
9
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "total_flos": 1.394810359803495e+16,
4
+ "train_loss": 0.0028968164414402532,
5
+ "train_runtime": 2196.5741,
6
+ "train_samples": 32447,
7
+ "train_samples_per_second": 147.716,
8
+ "train_steps_per_second": 2.308
9
  }
trainer_state.json CHANGED
@@ -1,166 +1,215 @@
1
  {
2
- "best_metric": 0.6946045049764275,
3
- "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-1088",
4
- "epoch": 9.967845659163988,
5
  "eval_steps": 500,
6
- "global_step": 1550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.9967845659163987,
13
- "eval_accuracy": 0.9435978055118868,
14
- "eval_f1": 0.593056643174106,
15
- "eval_loss": 0.14849668741226196,
16
- "eval_precision": 0.5668662674650699,
17
- "eval_recall": 0.6217843459222769,
18
- "eval_runtime": 5.8633,
19
- "eval_samples_per_second": 429.618,
20
- "eval_steps_per_second": 53.724,
21
- "step": 155
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  },
23
  {
24
  "epoch": 2.0,
25
- "eval_accuracy": 0.9408546953704001,
26
- "eval_f1": 0.6299060919817,
27
- "eval_loss": 0.16094118356704712,
28
- "eval_precision": 0.5623387790197765,
29
- "eval_recall": 0.715927750410509,
30
- "eval_runtime": 5.9155,
31
- "eval_samples_per_second": 425.831,
32
- "eval_steps_per_second": 53.25,
33
- "step": 311
34
- },
35
- {
36
- "epoch": 2.996784565916399,
37
- "eval_accuracy": 0.9486669447207161,
38
- "eval_f1": 0.6676790685902303,
39
- "eval_loss": 0.1634686440229416,
40
- "eval_precision": 0.6209981167608286,
41
- "eval_recall": 0.7219485495347564,
42
- "eval_runtime": 5.9009,
43
- "eval_samples_per_second": 426.884,
44
- "eval_steps_per_second": 53.382,
45
- "step": 466
46
- },
47
- {
48
- "epoch": 3.215434083601286,
49
- "grad_norm": 0.9288749098777771,
50
- "learning_rate": 3.387096774193548e-05,
51
- "loss": 0.1246,
52
- "step": 500
 
 
 
 
 
 
 
53
  },
54
  {
55
  "epoch": 4.0,
56
- "eval_accuracy": 0.9492604831723828,
57
- "eval_f1": 0.6711919630735812,
58
- "eval_loss": 0.20466655492782593,
59
- "eval_precision": 0.665948275862069,
60
- "eval_recall": 0.6765188834154351,
61
- "eval_runtime": 5.9155,
62
- "eval_samples_per_second": 425.831,
63
- "eval_steps_per_second": 53.25,
64
- "step": 622
65
- },
66
- {
67
- "epoch": 4.996784565916399,
68
- "eval_accuracy": 0.9479771567904007,
69
- "eval_f1": 0.6827731092436975,
70
- "eval_loss": 0.2133806049823761,
71
- "eval_precision": 0.6562342251388188,
72
- "eval_recall": 0.7115489874110563,
73
- "eval_runtime": 5.8759,
74
- "eval_samples_per_second": 428.701,
75
- "eval_steps_per_second": 53.609,
76
- "step": 777
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  },
78
  {
79
  "epoch": 6.0,
80
- "eval_accuracy": 0.949388815810581,
81
- "eval_f1": 0.6795913020696882,
82
- "eval_loss": 0.2258971482515335,
83
- "eval_precision": 0.6517587939698493,
84
- "eval_recall": 0.7099069512862616,
85
- "eval_runtime": 5.9106,
86
- "eval_samples_per_second": 426.187,
87
- "eval_steps_per_second": 53.294,
88
- "step": 933
89
- },
90
- {
91
- "epoch": 6.430868167202572,
92
- "grad_norm": 0.18494442105293274,
93
- "learning_rate": 1.774193548387097e-05,
94
- "loss": 0.0242,
95
- "step": 1000
96
  },
97
  {
98
- "epoch": 6.996784565916399,
99
- "eval_accuracy": 0.9496615226667522,
100
- "eval_f1": 0.6946045049764275,
101
- "eval_loss": 0.24502409994602203,
102
- "eval_precision": 0.6659969864389754,
103
- "eval_recall": 0.7257799671592775,
104
- "eval_runtime": 5.8918,
105
- "eval_samples_per_second": 427.542,
106
- "eval_steps_per_second": 53.464,
107
- "step": 1088
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  },
109
  {
110
  "epoch": 8.0,
111
- "eval_accuracy": 0.9490679842150855,
112
- "eval_f1": 0.6841015018125324,
113
- "eval_loss": 0.26501980423927307,
114
- "eval_precision": 0.6491400491400492,
115
- "eval_recall": 0.7230432402846196,
116
- "eval_runtime": 5.9184,
117
- "eval_samples_per_second": 425.624,
118
- "eval_steps_per_second": 53.224,
119
- "step": 1244
120
- },
121
- {
122
- "epoch": 8.996784565916398,
123
- "eval_accuracy": 0.9497577721454009,
124
- "eval_f1": 0.687797147385103,
125
- "eval_loss": 0.27453720569610596,
126
- "eval_precision": 0.664624808575804,
127
- "eval_recall": 0.7126436781609196,
128
- "eval_runtime": 5.9283,
129
- "eval_samples_per_second": 424.913,
130
- "eval_steps_per_second": 53.135,
131
- "step": 1399
132
- },
133
- {
134
- "epoch": 9.646302250803858,
135
- "grad_norm": 0.35425594449043274,
136
- "learning_rate": 1.6129032258064516e-06,
137
- "loss": 0.0083,
138
- "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  },
140
  {
141
- "epoch": 9.967845659163988,
142
- "eval_accuracy": 0.950255061118419,
143
- "eval_f1": 0.6896008403361344,
144
- "eval_loss": 0.27744925022125244,
145
- "eval_precision": 0.662796567390207,
146
- "eval_recall": 0.7186644772851669,
147
- "eval_runtime": 6.1493,
148
- "eval_samples_per_second": 409.643,
149
- "eval_steps_per_second": 51.226,
150
- "step": 1550
151
- },
152
- {
153
- "epoch": 9.967845659163988,
154
- "step": 1550,
155
- "total_flos": 4644619911314910.0,
156
- "train_loss": 0.050868147861573,
157
- "train_runtime": 855.1929,
158
- "train_samples_per_second": 116.102,
159
- "train_steps_per_second": 1.812
160
  }
161
  ],
162
  "logging_steps": 500,
163
- "max_steps": 1550,
164
  "num_input_tokens_seen": 0,
165
  "num_train_epochs": 10,
166
  "save_steps": 500,
@@ -176,7 +225,7 @@
176
  "attributes": {}
177
  }
178
  },
179
- "total_flos": 4644619911314910.0,
180
  "train_batch_size": 32,
181
  "trial_name": null,
182
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9335180055401663,
3
+ "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-4056",
4
+ "epoch": 10.0,
5
  "eval_steps": 500,
6
+ "global_step": 5070,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.9861932938856016,
13
+ "grad_norm": 0.16175590455532074,
14
+ "learning_rate": 4.5069033530571994e-05,
15
+ "loss": 0.0183,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.9985418469009014,
21
+ "eval_f1": 0.9170464904284412,
22
+ "eval_loss": 0.005470008589327335,
23
+ "eval_precision": 0.8974130240856378,
24
+ "eval_recall": 0.9375582479030755,
25
+ "eval_runtime": 15.2218,
26
+ "eval_samples_per_second": 456.319,
27
+ "eval_steps_per_second": 57.089,
28
+ "step": 507
29
+ },
30
+ {
31
+ "epoch": 1.972386587771203,
32
+ "grad_norm": 0.13494741916656494,
33
+ "learning_rate": 4.0138067061143986e-05,
34
+ "loss": 0.0043,
35
+ "step": 1000
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_accuracy": 0.9985976613735942,
40
+ "eval_f1": 0.9208103130755064,
41
+ "eval_loss": 0.005941031500697136,
42
+ "eval_precision": 0.9099181073703366,
43
+ "eval_recall": 0.9319664492078286,
44
+ "eval_runtime": 15.129,
45
+ "eval_samples_per_second": 459.117,
46
+ "eval_steps_per_second": 57.439,
47
+ "step": 1014
48
+ },
49
+ {
50
+ "epoch": 2.9585798816568047,
51
+ "grad_norm": 0.14869354665279388,
52
+ "learning_rate": 3.520710059171598e-05,
53
+ "loss": 0.0022,
54
+ "step": 1500
55
+ },
56
+ {
57
+ "epoch": 3.0,
58
+ "eval_accuracy": 0.998513939664555,
59
+ "eval_f1": 0.9155963302752294,
60
+ "eval_loss": 0.005716267507523298,
61
+ "eval_precision": 0.9015356820234869,
62
+ "eval_recall": 0.9301025163094129,
63
+ "eval_runtime": 15.1542,
64
+ "eval_samples_per_second": 458.356,
65
+ "eval_steps_per_second": 57.344,
66
+ "step": 1521
67
+ },
68
+ {
69
+ "epoch": 3.9447731755424065,
70
+ "grad_norm": 0.15148845314979553,
71
+ "learning_rate": 3.027613412228797e-05,
72
+ "loss": 0.0018,
73
+ "step": 2000
74
  },
75
  {
76
  "epoch": 4.0,
77
+ "eval_accuracy": 0.9985767309463344,
78
+ "eval_f1": 0.9227166276346604,
79
+ "eval_loss": 0.0072021945379674435,
80
+ "eval_precision": 0.9274952919020716,
81
+ "eval_recall": 0.9179869524697111,
82
+ "eval_runtime": 15.3453,
83
+ "eval_samples_per_second": 452.646,
84
+ "eval_steps_per_second": 56.63,
85
+ "step": 2028
86
+ },
87
+ {
88
+ "epoch": 4.930966469428008,
89
+ "grad_norm": 0.17791983485221863,
90
+ "learning_rate": 2.5345167652859964e-05,
91
+ "loss": 0.0009,
92
+ "step": 2500
93
+ },
94
+ {
95
+ "epoch": 5.0,
96
+ "eval_accuracy": 0.9986604526553735,
97
+ "eval_f1": 0.9215236346948141,
98
+ "eval_loss": 0.0063728527165949345,
99
+ "eval_precision": 0.9077757685352622,
100
+ "eval_recall": 0.9356943150046598,
101
+ "eval_runtime": 15.1945,
102
+ "eval_samples_per_second": 457.14,
103
+ "eval_steps_per_second": 57.192,
104
+ "step": 2535
105
+ },
106
+ {
107
+ "epoch": 5.9171597633136095,
108
+ "grad_norm": 0.027111981064081192,
109
+ "learning_rate": 2.0414201183431952e-05,
110
+ "loss": 0.0007,
111
+ "step": 3000
112
  },
113
  {
114
  "epoch": 6.0,
115
+ "eval_accuracy": 0.9987302207462395,
116
+ "eval_f1": 0.9274826789838336,
117
+ "eval_loss": 0.006401057820767164,
118
+ "eval_precision": 0.9194139194139194,
119
+ "eval_recall": 0.9356943150046598,
120
+ "eval_runtime": 15.0971,
121
+ "eval_samples_per_second": 460.088,
122
+ "eval_steps_per_second": 57.561,
123
+ "step": 3042
 
 
 
 
 
 
 
124
  },
125
  {
126
+ "epoch": 6.903353057199211,
127
+ "grad_norm": 0.0044996230863034725,
128
+ "learning_rate": 1.5483234714003947e-05,
129
+ "loss": 0.0004,
130
+ "step": 3500
131
+ },
132
+ {
133
+ "epoch": 7.0,
134
+ "eval_accuracy": 0.9987651047916725,
135
+ "eval_f1": 0.9332096474953618,
136
+ "eval_loss": 0.00720419455319643,
137
+ "eval_precision": 0.9289012003693444,
138
+ "eval_recall": 0.9375582479030755,
139
+ "eval_runtime": 15.1686,
140
+ "eval_samples_per_second": 457.919,
141
+ "eval_steps_per_second": 57.289,
142
+ "step": 3549
143
+ },
144
+ {
145
+ "epoch": 7.889546351084813,
146
+ "grad_norm": 0.0005134351667948067,
147
+ "learning_rate": 1.0552268244575937e-05,
148
+ "loss": 0.0004,
149
+ "step": 4000
150
  },
151
  {
152
  "epoch": 8.0,
153
+ "eval_accuracy": 0.998772081600759,
154
+ "eval_f1": 0.9335180055401663,
155
+ "eval_loss": 0.007628325838595629,
156
+ "eval_precision": 0.9249771271729186,
157
+ "eval_recall": 0.9422180801491147,
158
+ "eval_runtime": 15.1289,
159
+ "eval_samples_per_second": 459.122,
160
+ "eval_steps_per_second": 57.44,
161
+ "step": 4056
162
+ },
163
+ {
164
+ "epoch": 8.875739644970414,
165
+ "grad_norm": 0.0011633747490122914,
166
+ "learning_rate": 5.621301775147929e-06,
167
+ "loss": 0.0003,
168
+ "step": 4500
169
+ },
170
+ {
171
+ "epoch": 9.0,
172
+ "eval_accuracy": 0.9987162671280663,
173
+ "eval_f1": 0.9262672811059908,
174
+ "eval_loss": 0.00773056922480464,
175
+ "eval_precision": 0.9161349134001823,
176
+ "eval_recall": 0.9366262814538676,
177
+ "eval_runtime": 15.3347,
178
+ "eval_samples_per_second": 452.96,
179
+ "eval_steps_per_second": 56.669,
180
+ "step": 4563
181
+ },
182
+ {
183
+ "epoch": 9.861932938856016,
184
+ "grad_norm": 0.001042825635522604,
185
+ "learning_rate": 6.903353057199211e-07,
186
+ "loss": 0.0002,
187
+ "step": 5000
188
+ },
189
+ {
190
+ "epoch": 10.0,
191
+ "eval_accuracy": 0.9987511511734993,
192
+ "eval_f1": 0.92797783933518,
193
+ "eval_loss": 0.00769586768001318,
194
+ "eval_precision": 0.9194876486733761,
195
+ "eval_recall": 0.9366262814538676,
196
+ "eval_runtime": 15.2047,
197
+ "eval_samples_per_second": 456.832,
198
+ "eval_steps_per_second": 57.153,
199
+ "step": 5070
200
  },
201
  {
202
+ "epoch": 10.0,
203
+ "step": 5070,
204
+ "total_flos": 1.394810359803495e+16,
205
+ "train_loss": 0.0028968164414402532,
206
+ "train_runtime": 2196.5741,
207
+ "train_samples_per_second": 147.716,
208
+ "train_steps_per_second": 2.308
 
 
 
 
 
 
 
 
 
 
 
 
209
  }
210
  ],
211
  "logging_steps": 500,
212
+ "max_steps": 5070,
213
  "num_input_tokens_seen": 0,
214
  "num_train_epochs": 10,
215
  "save_steps": 500,
 
225
  "attributes": {}
226
  }
227
  },
228
+ "total_flos": 1.394810359803495e+16,
229
  "train_batch_size": 32,
230
  "trial_name": null,
231
  "trial_params": null