hamedkhaledi commited on
Commit
0ac09fe
1 Parent(s): 53772cd
Files changed (3) hide show
  1. loss.tsv +3 -0
  2. pytorch_model.bin +3 -0
  3. training.log +96 -0
loss.tsv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+
2
+ 6 07:10:41 0 0.1000 0.07391205054453862 0.05534437298774719 0.8262 0.7999 0.8129 0.6949
3
+ 7 07:59:57 0 0.1000 0.07154660764968938 0.05505584925413132 0.8469 0.7901 0.8175 0.7019
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15da00307b7fec8958dafca847190ec19c2eaf49cfb1a3ac53d503c483e69152
3
+ size 413753973
training.log ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2022-03-25 06:21:37,092 ----------------------------------------------------------------------------------------------------
2
+ 2022-03-25 06:21:37,098 Model: "SequenceTagger(
3
+ (embeddings): StackedEmbeddings(
4
+ (list_embedding_0): WordEmbeddings(
5
+ 'fa'
6
+ (embedding): Embedding(56850, 300)
7
+ )
8
+ (list_embedding_1): FlairEmbeddings(
9
+ (lm): LanguageModel(
10
+ (drop): Dropout(p=0.1, inplace=False)
11
+ (encoder): Embedding(5105, 100)
12
+ (rnn): LSTM(100, 2048)
13
+ (decoder): Linear(in_features=2048, out_features=5105, bias=True)
14
+ )
15
+ )
16
+ (list_embedding_2): FlairEmbeddings(
17
+ (lm): LanguageModel(
18
+ (drop): Dropout(p=0.1, inplace=False)
19
+ (encoder): Embedding(5105, 100)
20
+ (rnn): LSTM(100, 2048)
21
+ (decoder): Linear(in_features=2048, out_features=5105, bias=True)
22
+ )
23
+ )
24
+ )
25
+ (word_dropout): WordDropout(p=0.05)
26
+ (locked_dropout): LockedDropout(p=0.5)
27
+ (embedding2nn): Linear(in_features=4396, out_features=4396, bias=True)
28
+ (rnn): LSTM(4396, 256, batch_first=True, bidirectional=True)
29
+ (linear): Linear(in_features=512, out_features=18, bias=True)
30
+ (beta): 1.0
31
+ (weights): None
32
+ (weight_tensor) None
33
+ )"
34
+ 2022-03-25 06:21:37,103 ----------------------------------------------------------------------------------------------------
35
+ 2022-03-25 06:21:37,108 Corpus: "Corpus: 23060 train + 4070 dev + 4150 test sentences"
36
+ 2022-03-25 06:21:37,111 ----------------------------------------------------------------------------------------------------
37
+ 2022-03-25 06:21:37,115 Parameters:
38
+ 2022-03-25 06:21:37,117 - learning_rate: "0.1"
39
+ 2022-03-25 06:21:37,119 - mini_batch_size: "4"
40
+ 2022-03-25 06:21:37,122 - patience: "3"
41
+ 2022-03-25 06:21:37,125 - anneal_factor: "0.5"
42
+ 2022-03-25 06:21:37,127 - max_epochs: "10"
43
+ 2022-03-25 06:21:37,129 - shuffle: "True"
44
+ 2022-03-25 06:21:37,132 - train_with_dev: "False"
45
+ 2022-03-25 06:21:37,135 - batch_growth_annealing: "False"
46
+ 2022-03-25 06:21:37,137 ----------------------------------------------------------------------------------------------------
47
+ 2022-03-25 06:21:37,140 Model training base path: "/content/gdrive/MyDrive/project/data/ner/model"
48
+ 2022-03-25 06:21:37,142 ----------------------------------------------------------------------------------------------------
49
+ 2022-03-25 06:21:37,145 Device: cuda:0
50
+ 2022-03-25 06:21:37,148 ----------------------------------------------------------------------------------------------------
51
+ 2022-03-25 06:21:37,150 Embeddings storage mode: none
52
+ 2022-03-25 06:21:37,398 ----------------------------------------------------------------------------------------------------
53
+ 2022-03-25 06:25:43,993 epoch 6 - iter 576/5765 - loss 0.07042695 - samples/sec: 9.35 - lr: 0.100000
54
+ 2022-03-25 06:29:47,830 epoch 6 - iter 1152/5765 - loss 0.07287426 - samples/sec: 9.49 - lr: 0.100000
55
+ 2022-03-25 06:34:02,575 epoch 6 - iter 1728/5765 - loss 0.07379352 - samples/sec: 9.08 - lr: 0.100000
56
+ 2022-03-25 06:38:22,556 epoch 6 - iter 2304/5765 - loss 0.07346159 - samples/sec: 8.90 - lr: 0.100000
57
+ 2022-03-25 06:42:37,312 epoch 6 - iter 2880/5765 - loss 0.07318457 - samples/sec: 9.08 - lr: 0.100000
58
+ 2022-03-25 06:47:03,459 epoch 6 - iter 3456/5765 - loss 0.07343553 - samples/sec: 8.69 - lr: 0.100000
59
+ 2022-03-25 06:51:22,020 epoch 6 - iter 4032/5765 - loss 0.07360594 - samples/sec: 8.95 - lr: 0.100000
60
+ 2022-03-25 06:55:36,718 epoch 6 - iter 4608/5765 - loss 0.07332146 - samples/sec: 9.08 - lr: 0.100000
61
+ 2022-03-25 07:00:02,036 epoch 6 - iter 5184/5765 - loss 0.07376939 - samples/sec: 8.72 - lr: 0.100000
62
+ 2022-03-25 07:04:32,247 epoch 6 - iter 5760/5765 - loss 0.07393306 - samples/sec: 8.56 - lr: 0.100000
63
+ 2022-03-25 07:04:35,408 ----------------------------------------------------------------------------------------------------
64
+ 2022-03-25 07:04:35,411 EPOCH 6 done: loss 0.0739 - lr 0.1000000
65
+ 2022-03-25 07:10:41,676 DEV : loss 0.05534437298774719 - f1-score (micro avg) 0.8129
66
+ 2022-03-25 07:10:41,758 BAD EPOCHS (no improvement): 0
67
+ 2022-03-25 07:10:43,386 saving best model
68
+ 2022-03-25 07:10:45,085 ----------------------------------------------------------------------------------------------------
69
+ 2022-03-25 07:15:08,362 epoch 7 - iter 576/5765 - loss 0.06846625 - samples/sec: 8.75 - lr: 0.100000
70
+ 2022-03-25 07:19:20,901 epoch 7 - iter 1152/5765 - loss 0.07066517 - samples/sec: 9.16 - lr: 0.100000
71
+ 2022-03-25 07:23:45,054 epoch 7 - iter 1728/5765 - loss 0.07063719 - samples/sec: 8.76 - lr: 0.100000
72
+ 2022-03-25 07:27:58,256 epoch 7 - iter 2304/5765 - loss 0.07101257 - samples/sec: 9.14 - lr: 0.100000
73
+ 2022-03-25 07:32:05,224 epoch 7 - iter 2880/5765 - loss 0.07072532 - samples/sec: 9.37 - lr: 0.100000
74
+ 2022-03-25 07:36:19,489 epoch 7 - iter 3456/5765 - loss 0.07040446 - samples/sec: 9.10 - lr: 0.100000
75
+ 2022-03-25 07:40:49,459 epoch 7 - iter 4032/5765 - loss 0.07117669 - samples/sec: 8.57 - lr: 0.100000
76
+ 2022-03-25 07:45:06,879 epoch 7 - iter 4608/5765 - loss 0.07147140 - samples/sec: 8.99 - lr: 0.100000
77
+ 2022-03-25 07:49:20,561 epoch 7 - iter 5184/5765 - loss 0.07151126 - samples/sec: 9.12 - lr: 0.100000
78
+ 2022-03-25 07:53:46,941 epoch 7 - iter 5760/5765 - loss 0.07156780 - samples/sec: 8.69 - lr: 0.100000
79
+ 2022-03-25 07:53:49,751 ----------------------------------------------------------------------------------------------------
80
+ 2022-03-25 07:53:49,759 EPOCH 7 done: loss 0.0715 - lr 0.1000000
81
+ 2022-03-25 07:59:57,729 DEV : loss 0.05505584925413132 - f1-score (micro avg) 0.8175
82
+ 2022-03-25 07:59:57,813 BAD EPOCHS (no improvement): 0
83
+ 2022-03-25 07:59:59,910 saving best model
84
+ 2022-03-25 08:00:01,383 ----------------------------------------------------------------------------------------------------
85
+ 2022-03-25 08:04:20,017 epoch 8 - iter 576/5765 - loss 0.06595992 - samples/sec: 8.91 - lr: 0.100000
86
+ 2022-03-25 08:08:34,362 epoch 8 - iter 1152/5765 - loss 0.06695618 - samples/sec: 9.10 - lr: 0.100000
87
+ 2022-03-25 08:13:01,311 epoch 8 - iter 1728/5765 - loss 0.06868385 - samples/sec: 8.66 - lr: 0.100000
88
+ 2022-03-25 08:17:19,699 epoch 8 - iter 2304/5765 - loss 0.06855573 - samples/sec: 8.95 - lr: 0.100000
89
+ 2022-03-25 08:21:39,417 epoch 8 - iter 2880/5765 - loss 0.06828534 - samples/sec: 8.91 - lr: 0.100000
90
+ 2022-03-25 08:25:58,656 epoch 8 - iter 3456/5765 - loss 0.06920992 - samples/sec: 8.92 - lr: 0.100000
91
+ 2022-03-25 08:30:19,059 epoch 8 - iter 4032/5765 - loss 0.06966214 - samples/sec: 8.88 - lr: 0.100000
92
+ 2022-03-25 08:34:32,114 epoch 8 - iter 4608/5765 - loss 0.06999527 - samples/sec: 9.14 - lr: 0.100000
93
+ 2022-03-25 08:38:45,063 epoch 8 - iter 5184/5765 - loss 0.07041313 - samples/sec: 9.15 - lr: 0.100000
94
+ 2022-03-25 08:42:53,891 epoch 8 - iter 5760/5765 - loss 0.07067043 - samples/sec: 9.30 - lr: 0.100000
95
+ 2022-03-25 08:42:56,995 ----------------------------------------------------------------------------------------------------
96
+ 2022-03-25 08:42:56,998 EPOCH 8 done: loss 0.0707 - lr 0.1000000