pjox's picture
Upload 6 files
2642fb3
2022-02-04 12:53:17,467 ----------------------------------------------------------------------------------------------------
2022-02-04 12:53:17,468 Model: "SequenceTagger(
(embeddings): TransformerWordEmbeddings(
(model): CamembertModel(
(embeddings): RobertaEmbeddings(
(word_embeddings): Embedding(32005, 768, padding_idx=1)
(position_embeddings): Embedding(514, 768, padding_idx=1)
(token_type_embeddings): Embedding(1, 768)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(encoder): RobertaEncoder(
(layer): ModuleList(
(0): RobertaLayer(
(attention): RobertaAttention(
(self): RobertaSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): RobertaSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): RobertaIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): RobertaOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(1): RobertaLayer(
(attention): RobertaAttention(
(self): RobertaSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): RobertaSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): RobertaIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): RobertaOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(2): RobertaLayer(
(attention): RobertaAttention(
(self): RobertaSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): RobertaSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): RobertaIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): RobertaOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(3): RobertaLayer(
(attention): RobertaAttention(
(self): RobertaSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): RobertaSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): RobertaIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): RobertaOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(4): RobertaLayer(
(attention): RobertaAttention(
(self): RobertaSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): RobertaSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): RobertaIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): RobertaOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(5): RobertaLayer(
(attention): RobertaAttention(
(self): RobertaSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): RobertaSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): RobertaIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): RobertaOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(6): RobertaLayer(
(attention): RobertaAttention(
(self): RobertaSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): RobertaSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): RobertaIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): RobertaOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(7): RobertaLayer(
(attention): RobertaAttention(
(self): RobertaSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): RobertaSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): RobertaIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): RobertaOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(8): RobertaLayer(
(attention): RobertaAttention(
(self): RobertaSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): RobertaSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): RobertaIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): RobertaOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(9): RobertaLayer(
(attention): RobertaAttention(
(self): RobertaSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): RobertaSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): RobertaIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): RobertaOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(10): RobertaLayer(
(attention): RobertaAttention(
(self): RobertaSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): RobertaSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): RobertaIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): RobertaOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(11): RobertaLayer(
(attention): RobertaAttention(
(self): RobertaSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): RobertaSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): RobertaIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): RobertaOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
)
)
(pooler): RobertaPooler(
(dense): Linear(in_features=768, out_features=768, bias=True)
(activation): Tanh()
)
)
)
(word_dropout): WordDropout(p=0.05)
(locked_dropout): LockedDropout(p=0.5)
(linear): Linear(in_features=768, out_features=51, bias=True)
(beta): 1.0
(weights): None
(weight_tensor) None
)"
2022-02-04 12:53:17,506 ----------------------------------------------------------------------------------------------------
2022-02-04 12:53:17,506 Corpus: "Corpus: 5642 train + 195 dev + 649 test sentences"
2022-02-04 12:53:17,506 ----------------------------------------------------------------------------------------------------
2022-02-04 12:53:17,506 Parameters:
2022-02-04 12:53:17,506 - learning_rate: "5e-06"
2022-02-04 12:53:17,506 - mini_batch_size: "32"
2022-02-04 12:53:17,506 - patience: "3"
2022-02-04 12:53:17,506 - anneal_factor: "0.5"
2022-02-04 12:53:17,506 - max_epochs: "10"
2022-02-04 12:53:17,506 - shuffle: "True"
2022-02-04 12:53:17,506 - train_with_dev: "False"
2022-02-04 12:53:17,506 - batch_growth_annealing: "False"
2022-02-04 12:53:17,506 ----------------------------------------------------------------------------------------------------
2022-02-04 12:53:17,506 Model training base path: "resources/taggers/pos-camembert"
2022-02-04 12:53:17,506 ----------------------------------------------------------------------------------------------------
2022-02-04 12:53:17,511 Device: cuda:0
2022-02-04 12:53:17,511 ----------------------------------------------------------------------------------------------------
2022-02-04 12:53:17,511 Embeddings storage mode: none
2022-02-04 12:53:17,513 ----------------------------------------------------------------------------------------------------
2022-02-04 12:53:38,315 epoch 1 - iter 17/177 - loss 3.96872255 - samples/sec: 26.15 - lr: 0.000000
2022-02-04 12:53:54,561 epoch 1 - iter 34/177 - loss 3.96629180 - samples/sec: 33.49 - lr: 0.000001
2022-02-04 12:54:11,140 epoch 1 - iter 51/177 - loss 3.95985736 - samples/sec: 32.82 - lr: 0.000001
2022-02-04 12:54:27,471 epoch 1 - iter 68/177 - loss 3.95248851 - samples/sec: 33.31 - lr: 0.000002
2022-02-04 12:54:44,574 epoch 1 - iter 85/177 - loss 3.94223845 - samples/sec: 31.81 - lr: 0.000002
2022-02-04 12:54:59,811 epoch 1 - iter 102/177 - loss 3.93034373 - samples/sec: 35.71 - lr: 0.000003
2022-02-04 12:55:17,140 epoch 1 - iter 119/177 - loss 3.91667895 - samples/sec: 31.39 - lr: 0.000003
2022-02-04 12:55:33,245 epoch 1 - iter 136/177 - loss 3.90088222 - samples/sec: 33.78 - lr: 0.000004
2022-02-04 12:55:48,743 epoch 1 - iter 153/177 - loss 3.87766994 - samples/sec: 35.11 - lr: 0.000004
2022-02-04 12:56:06,269 epoch 1 - iter 170/177 - loss 3.84880099 - samples/sec: 31.04 - lr: 0.000005
2022-02-04 12:56:12,033 ----------------------------------------------------------------------------------------------------
2022-02-04 12:56:12,033 EPOCH 1 done: loss 3.8419 - lr 0.0000050
2022-02-04 12:56:18,260 DEV : loss 3.509683847427368 - f1-score (micro avg) 0.3053
2022-02-04 12:56:18,262 BAD EPOCHS (no improvement): 4
2022-02-04 12:56:18,285 ----------------------------------------------------------------------------------------------------
2022-02-04 12:56:35,575 epoch 2 - iter 17/177 - loss 3.54034313 - samples/sec: 31.47 - lr: 0.000005
2022-02-04 12:56:52,475 epoch 2 - iter 34/177 - loss 3.50300407 - samples/sec: 32.19 - lr: 0.000005
2022-02-04 12:57:09,058 epoch 2 - iter 51/177 - loss 3.46864739 - samples/sec: 32.81 - lr: 0.000005
2022-02-04 12:57:25,624 epoch 2 - iter 68/177 - loss 3.43125430 - samples/sec: 32.84 - lr: 0.000005
2022-02-04 12:57:42,941 epoch 2 - iter 85/177 - loss 3.39270879 - samples/sec: 31.42 - lr: 0.000005
2022-02-04 12:57:59,153 epoch 2 - iter 102/177 - loss 3.35791389 - samples/sec: 33.56 - lr: 0.000005
2022-02-04 12:58:16,864 epoch 2 - iter 119/177 - loss 3.32573531 - samples/sec: 30.72 - lr: 0.000005
2022-02-04 12:58:34,354 epoch 2 - iter 136/177 - loss 3.29370429 - samples/sec: 31.11 - lr: 0.000005
2022-02-04 12:58:51,116 epoch 2 - iter 153/177 - loss 3.26367901 - samples/sec: 32.46 - lr: 0.000005
2022-02-04 12:59:08,117 epoch 2 - iter 170/177 - loss 3.23382669 - samples/sec: 32.00 - lr: 0.000004
2022-02-04 12:59:15,072 ----------------------------------------------------------------------------------------------------
2022-02-04 12:59:15,074 EPOCH 2 done: loss 3.2228 - lr 0.0000044
2022-02-04 12:59:20,452 DEV : loss 2.775869846343994 - f1-score (micro avg) 0.6141
2022-02-04 12:59:20,455 BAD EPOCHS (no improvement): 4
2022-02-04 12:59:20,455 ----------------------------------------------------------------------------------------------------
2022-02-04 12:59:38,069 epoch 3 - iter 17/177 - loss 2.92343717 - samples/sec: 30.89 - lr: 0.000004
2022-02-04 12:59:54,400 epoch 3 - iter 34/177 - loss 2.90201388 - samples/sec: 33.32 - lr: 0.000004
2022-02-04 13:00:12,150 epoch 3 - iter 51/177 - loss 2.88495451 - samples/sec: 30.65 - lr: 0.000004
2022-02-04 13:00:28,960 epoch 3 - iter 68/177 - loss 2.86475060 - samples/sec: 32.37 - lr: 0.000004
2022-02-04 13:00:47,016 epoch 3 - iter 85/177 - loss 2.84779479 - samples/sec: 30.13 - lr: 0.000004
2022-02-04 13:01:03,811 epoch 3 - iter 102/177 - loss 2.83018073 - samples/sec: 32.40 - lr: 0.000004
2022-02-04 13:01:19,598 epoch 3 - iter 119/177 - loss 2.81577196 - samples/sec: 34.47 - lr: 0.000004
2022-02-04 13:01:36,746 epoch 3 - iter 136/177 - loss 2.80310518 - samples/sec: 31.73 - lr: 0.000004
2022-02-04 13:01:53,532 epoch 3 - iter 153/177 - loss 2.79075673 - samples/sec: 32.41 - lr: 0.000004
2022-02-04 13:02:11,809 epoch 3 - iter 170/177 - loss 2.77624103 - samples/sec: 29.77 - lr: 0.000004
2022-02-04 13:02:17,990 ----------------------------------------------------------------------------------------------------
2022-02-04 13:02:17,991 EPOCH 3 done: loss 2.7701 - lr 0.0000039
2022-02-04 13:02:23,777 DEV : loss 2.410931348800659 - f1-score (micro avg) 0.819
2022-02-04 13:02:23,780 BAD EPOCHS (no improvement): 4
2022-02-04 13:02:23,781 ----------------------------------------------------------------------------------------------------
2022-02-04 13:02:41,231 epoch 4 - iter 17/177 - loss 2.60188784 - samples/sec: 31.18 - lr: 0.000004
2022-02-04 13:02:58,635 epoch 4 - iter 34/177 - loss 2.59095213 - samples/sec: 31.26 - lr: 0.000004
2022-02-04 13:03:15,040 epoch 4 - iter 51/177 - loss 2.58502577 - samples/sec: 33.17 - lr: 0.000004
2022-02-04 13:03:32,700 epoch 4 - iter 68/177 - loss 2.57149732 - samples/sec: 30.81 - lr: 0.000004
2022-02-04 13:03:49,889 epoch 4 - iter 85/177 - loss 2.55924475 - samples/sec: 31.65 - lr: 0.000004
2022-02-04 13:04:07,257 epoch 4 - iter 102/177 - loss 2.54972860 - samples/sec: 31.33 - lr: 0.000004
2022-02-04 13:04:24,141 epoch 4 - iter 119/177 - loss 2.54070048 - samples/sec: 32.23 - lr: 0.000004
2022-02-04 13:04:40,320 epoch 4 - iter 136/177 - loss 2.53210863 - samples/sec: 33.69 - lr: 0.000003
2022-02-04 13:04:57,281 epoch 4 - iter 153/177 - loss 2.52441237 - samples/sec: 32.08 - lr: 0.000003
2022-02-04 13:05:15,246 epoch 4 - iter 170/177 - loss 2.51520228 - samples/sec: 30.29 - lr: 0.000003
2022-02-04 13:05:21,452 ----------------------------------------------------------------------------------------------------
2022-02-04 13:05:21,458 EPOCH 4 done: loss 2.5123 - lr 0.0000033
2022-02-04 13:05:27,295 DEV : loss 2.1908302307128906 - f1-score (micro avg) 0.8605
2022-02-04 13:05:27,310 BAD EPOCHS (no improvement): 4
2022-02-04 13:05:27,310 ----------------------------------------------------------------------------------------------------
2022-02-04 13:05:44,024 epoch 5 - iter 17/177 - loss 2.39887737 - samples/sec: 32.55 - lr: 0.000003
2022-02-04 13:06:01,687 epoch 5 - iter 34/177 - loss 2.39948538 - samples/sec: 30.80 - lr: 0.000003
2022-02-04 13:06:19,664 epoch 5 - iter 51/177 - loss 2.40078878 - samples/sec: 30.29 - lr: 0.000003
2022-02-04 13:06:36,241 epoch 5 - iter 68/177 - loss 2.39524823 - samples/sec: 32.93 - lr: 0.000003
2022-02-04 13:06:52,683 epoch 5 - iter 85/177 - loss 2.38764769 - samples/sec: 33.17 - lr: 0.000003
2022-02-04 13:07:09,718 epoch 5 - iter 102/177 - loss 2.38104055 - samples/sec: 31.94 - lr: 0.000003
2022-02-04 13:07:26,578 epoch 5 - iter 119/177 - loss 2.37384530 - samples/sec: 32.29 - lr: 0.000003
2022-02-04 13:07:42,599 epoch 5 - iter 136/177 - loss 2.36823710 - samples/sec: 33.96 - lr: 0.000003
2022-02-04 13:08:00,031 epoch 5 - iter 153/177 - loss 2.36030726 - samples/sec: 31.25 - lr: 0.000003
2022-02-04 13:08:17,779 epoch 5 - iter 170/177 - loss 2.35368343 - samples/sec: 30.72 - lr: 0.000003
2022-02-04 13:08:24,110 ----------------------------------------------------------------------------------------------------
2022-02-04 13:08:24,111 EPOCH 5 done: loss 2.3509 - lr 0.0000028
2022-02-04 13:08:30,298 DEV : loss 2.0516607761383057 - f1-score (micro avg) 0.8737
2022-02-04 13:08:30,301 BAD EPOCHS (no improvement): 4
2022-02-04 13:08:30,301 ----------------------------------------------------------------------------------------------------
2022-02-04 13:08:46,667 epoch 6 - iter 17/177 - loss 2.27743160 - samples/sec: 33.25 - lr: 0.000003
2022-02-04 13:09:04,814 epoch 6 - iter 34/177 - loss 2.27286852 - samples/sec: 29.99 - lr: 0.000003
2022-02-04 13:09:21,239 epoch 6 - iter 51/177 - loss 2.27175336 - samples/sec: 33.23 - lr: 0.000003
2022-02-04 13:09:38,163 epoch 6 - iter 68/177 - loss 2.26491131 - samples/sec: 32.15 - lr: 0.000003
2022-02-04 13:09:54,338 epoch 6 - iter 85/177 - loss 2.25999023 - samples/sec: 33.65 - lr: 0.000003
2022-02-04 13:10:12,270 epoch 6 - iter 102/177 - loss 2.25580949 - samples/sec: 30.38 - lr: 0.000002
2022-02-04 13:10:29,245 epoch 6 - iter 119/177 - loss 2.25275307 - samples/sec: 32.13 - lr: 0.000002
2022-02-04 13:10:46,065 epoch 6 - iter 136/177 - loss 2.24661845 - samples/sec: 32.40 - lr: 0.000002
2022-02-04 13:11:03,357 epoch 6 - iter 153/177 - loss 2.24241040 - samples/sec: 31.47 - lr: 0.000002
2022-02-04 13:11:22,211 epoch 6 - iter 170/177 - loss 2.23773462 - samples/sec: 28.87 - lr: 0.000002
2022-02-04 13:11:28,309 ----------------------------------------------------------------------------------------------------
2022-02-04 13:11:28,321 EPOCH 6 done: loss 2.2366 - lr 0.0000022
2022-02-04 13:11:34,136 DEV : loss 1.9612011909484863 - f1-score (micro avg) 0.884
2022-02-04 13:11:34,150 BAD EPOCHS (no improvement): 4
2022-02-04 13:11:34,151 ----------------------------------------------------------------------------------------------------
2022-02-04 13:11:50,446 epoch 7 - iter 17/177 - loss 2.19566504 - samples/sec: 33.39 - lr: 0.000002
2022-02-04 13:12:06,851 epoch 7 - iter 34/177 - loss 2.19802945 - samples/sec: 33.21 - lr: 0.000002
2022-02-04 13:12:23,401 epoch 7 - iter 51/177 - loss 2.19405535 - samples/sec: 32.88 - lr: 0.000002
2022-02-04 13:12:41,303 epoch 7 - iter 68/177 - loss 2.19162087 - samples/sec: 30.39 - lr: 0.000002
2022-02-04 13:12:58,144 epoch 7 - iter 85/177 - loss 2.18471516 - samples/sec: 32.35 - lr: 0.000002
2022-02-04 13:13:16,467 epoch 7 - iter 102/177 - loss 2.18080579 - samples/sec: 29.75 - lr: 0.000002
2022-02-04 13:13:34,031 epoch 7 - iter 119/177 - loss 2.17936921 - samples/sec: 31.00 - lr: 0.000002
2022-02-04 13:13:51,077 epoch 7 - iter 136/177 - loss 2.17514038 - samples/sec: 32.02 - lr: 0.000002
2022-02-04 13:14:07,857 epoch 7 - iter 153/177 - loss 2.17141812 - samples/sec: 32.48 - lr: 0.000002
2022-02-04 13:14:25,422 epoch 7 - iter 170/177 - loss 2.16711471 - samples/sec: 30.99 - lr: 0.000002
2022-02-04 13:14:31,227 ----------------------------------------------------------------------------------------------------
2022-02-04 13:14:31,228 EPOCH 7 done: loss 2.1662 - lr 0.0000017
2022-02-04 13:14:37,035 DEV : loss 1.8981177806854248 - f1-score (micro avg) 0.9008
2022-02-04 13:14:37,049 BAD EPOCHS (no improvement): 4
2022-02-04 13:14:37,050 ----------------------------------------------------------------------------------------------------
2022-02-04 13:14:54,867 epoch 8 - iter 17/177 - loss 2.13839948 - samples/sec: 30.54 - lr: 0.000002
2022-02-04 13:15:11,283 epoch 8 - iter 34/177 - loss 2.13301605 - samples/sec: 33.16 - lr: 0.000002
2022-02-04 13:15:28,761 epoch 8 - iter 51/177 - loss 2.12335776 - samples/sec: 31.15 - lr: 0.000002
2022-02-04 13:15:44,480 epoch 8 - iter 68/177 - loss 2.12525500 - samples/sec: 34.61 - lr: 0.000001
2022-02-04 13:16:01,084 epoch 8 - iter 85/177 - loss 2.12100353 - samples/sec: 32.77 - lr: 0.000001
2022-02-04 13:16:17,945 epoch 8 - iter 102/177 - loss 2.12081652 - samples/sec: 32.27 - lr: 0.000001
2022-02-04 13:16:34,469 epoch 8 - iter 119/177 - loss 2.11872473 - samples/sec: 32.93 - lr: 0.000001
2022-02-04 13:16:50,308 epoch 8 - iter 136/177 - loss 2.11635062 - samples/sec: 34.35 - lr: 0.000001
2022-02-04 13:17:07,313 epoch 8 - iter 153/177 - loss 2.11371370 - samples/sec: 32.00 - lr: 0.000001
2022-02-04 13:17:25,553 epoch 8 - iter 170/177 - loss 2.11100152 - samples/sec: 29.83 - lr: 0.000001
2022-02-04 13:17:33,472 ----------------------------------------------------------------------------------------------------
2022-02-04 13:17:33,473 EPOCH 8 done: loss 2.1112 - lr 0.0000011
2022-02-04 13:17:39,308 DEV : loss 1.8548760414123535 - f1-score (micro avg) 0.9117
2022-02-04 13:17:39,311 BAD EPOCHS (no improvement): 4
2022-02-04 13:17:39,311 ----------------------------------------------------------------------------------------------------
2022-02-04 13:17:56,622 epoch 9 - iter 17/177 - loss 2.06819398 - samples/sec: 31.43 - lr: 0.000001
2022-02-04 13:18:13,360 epoch 9 - iter 34/177 - loss 2.07590305 - samples/sec: 32.51 - lr: 0.000001
2022-02-04 13:18:31,366 epoch 9 - iter 51/177 - loss 2.07666788 - samples/sec: 30.22 - lr: 0.000001
2022-02-04 13:18:49,983 epoch 9 - iter 68/177 - loss 2.07961625 - samples/sec: 29.23 - lr: 0.000001
2022-02-04 13:19:06,239 epoch 9 - iter 85/177 - loss 2.08063462 - samples/sec: 33.47 - lr: 0.000001
2022-02-04 13:19:23,068 epoch 9 - iter 102/177 - loss 2.08002246 - samples/sec: 32.33 - lr: 0.000001
2022-02-04 13:19:40,188 epoch 9 - iter 119/177 - loss 2.07956869 - samples/sec: 31.78 - lr: 0.000001
2022-02-04 13:19:57,482 epoch 9 - iter 136/177 - loss 2.07835867 - samples/sec: 31.47 - lr: 0.000001
2022-02-04 13:20:14,155 epoch 9 - iter 153/177 - loss 2.07750905 - samples/sec: 32.64 - lr: 0.000001
2022-02-04 13:20:31,533 epoch 9 - iter 170/177 - loss 2.07545212 - samples/sec: 31.31 - lr: 0.000001
2022-02-04 13:20:37,466 ----------------------------------------------------------------------------------------------------
2022-02-04 13:20:37,468 EPOCH 9 done: loss 2.0759 - lr 0.0000006
2022-02-04 13:20:43,299 DEV : loss 1.830302357673645 - f1-score (micro avg) 0.9161
2022-02-04 13:20:43,314 BAD EPOCHS (no improvement): 4
2022-02-04 13:20:43,314 ----------------------------------------------------------------------------------------------------
2022-02-04 13:21:00,247 epoch 10 - iter 17/177 - loss 2.06625894 - samples/sec: 32.13 - lr: 0.000001
2022-02-04 13:21:16,847 epoch 10 - iter 34/177 - loss 2.06850742 - samples/sec: 32.78 - lr: 0.000000
2022-02-04 13:21:34,047 epoch 10 - iter 51/177 - loss 2.06653386 - samples/sec: 31.68 - lr: 0.000000
2022-02-04 13:21:50,597 epoch 10 - iter 68/177 - loss 2.06650174 - samples/sec: 32.88 - lr: 0.000000
2022-02-04 13:22:07,286 epoch 10 - iter 85/177 - loss 2.06409229 - samples/sec: 32.61 - lr: 0.000000
2022-02-04 13:22:25,744 epoch 10 - iter 102/177 - loss 2.06162033 - samples/sec: 29.48 - lr: 0.000000
2022-02-04 13:22:43,419 epoch 10 - iter 119/177 - loss 2.06248176 - samples/sec: 30.78 - lr: 0.000000
2022-02-04 13:22:59,502 epoch 10 - iter 136/177 - loss 2.06392395 - samples/sec: 33.83 - lr: 0.000000
2022-02-04 13:23:16,396 epoch 10 - iter 153/177 - loss 2.06446242 - samples/sec: 32.21 - lr: 0.000000
2022-02-04 13:23:33,136 epoch 10 - iter 170/177 - loss 2.06210437 - samples/sec: 32.50 - lr: 0.000000
2022-02-04 13:23:40,551 ----------------------------------------------------------------------------------------------------
2022-02-04 13:23:40,552 EPOCH 10 done: loss 2.0624 - lr 0.0000000
2022-02-04 13:23:46,365 DEV : loss 1.8217284679412842 - f1-score (micro avg) 0.9195
2022-02-04 13:23:46,367 BAD EPOCHS (no improvement): 4
2022-02-04 13:23:47,542 ----------------------------------------------------------------------------------------------------
2022-02-04 13:23:47,544 Testing using last state of model ...
2022-02-04 13:24:07,461 0.9181 0.9181 0.9181 0.9181
2022-02-04 13:24:07,462
Results:
- F-score (micro) 0.9181
- F-score (macro) 0.439
- Accuracy 0.9181
By class:
precision recall f1-score support
NOMcom 0.9530 0.9808 0.9667 2130
VERcjg 0.9683 0.9935 0.9807 1535
PRE 0.8411 0.9940 0.9112 1331
PROper 0.9253 0.9963 0.9595 1368
PONfbl 0.9824 0.9993 0.9908 1341
ADVgen 0.8179 0.8276 0.8227 841
PONfrt 0.9721 1.0000 0.9859 662
DETdef 0.9393 0.9967 0.9672 606
ADJqua 0.8289 0.9400 0.8810 500
VERinf 0.9706 0.9960 0.9831 497
DETpos 0.9791 0.9979 0.9884 469
CONcoo 0.9645 0.9935 0.9788 465
CONsub 0.7437 0.9846 0.8473 389
VERppe 0.9042 0.9408 0.9221 321
DETndf 0.7270 0.9959 0.8405 246
NOMpro 0.9485 0.8340 0.8876 265
PROrel 0.9398 0.7519 0.8354 270
ADVneg 0.9577 0.7528 0.8430 271
DETdem 0.9934 0.9742 0.9837 155
PROind 1.0000 0.4894 0.6571 188
PROadv 0.9000 0.8108 0.8531 111
PROdem 1.0000 0.6387 0.7795 119
DETind 0.8000 0.7347 0.7660 98
PRE.DETdef 0.0000 0.0000 0.0000 183
VERppa 0.0000 0.0000 0.0000 67
PROimp 0.0000 0.0000 0.0000 54
INJ 0.0000 0.0000 0.0000 35
DETcar 0.0000 0.0000 0.0000 31
ADJind 0.0000 0.0000 0.0000 30
PROint 0.0000 0.0000 0.0000 22
ADJcar 0.0000 0.0000 0.0000 21
PROcar 0.0000 0.0000 0.0000 18
DETrel 0.0000 0.0000 0.0000 16
ADJord 0.0000 0.0000 0.0000 16
PONpga 0.0000 0.0000 0.0000 16
PROpos 0.0000 0.0000 0.0000 14
PONpdr 0.0000 0.0000 0.0000 13
DETint 0.0000 0.0000 0.0000 10
PONpxx 0.0000 0.0000 0.0000 6
ADVint 0.0000 0.0000 0.0000 5
PRE.PROrel 0.0000 0.0000 0.0000 2
latin 0.0000 0.0000 0.0000 2
PROord 0.0000 0.0000 0.0000 1
PRE.PROdem 0.0000 0.0000 0.0000 1
PRE.NOMcom 0.0000 0.0000 0.0000 1
ETR 0.0000 0.0000 0.0000 1
ADVsub 0.0000 0.0000 0.0000 1
micro avg 0.9181 0.9181 0.9181 14744
macro avg 0.4480 0.4388 0.4390 14744
weighted avg 0.8876 0.9181 0.8991 14744
samples avg 0.9181 0.9181 0.9181 14744
2022-02-04 13:24:07,477 ----------------------------------------------------------------------------------------------------