initial model commit
Browse files
README.md
CHANGED
@@ -79,7 +79,7 @@ from flair.models import SequenceTagger
|
|
79 |
tagger = SequenceTagger.load("flair/pos-english")
|
80 |
|
81 |
# make example sentence
|
82 |
-
sentence = Sentence("I love Berlin")
|
83 |
|
84 |
# predict NER tags
|
85 |
tagger.predict(sentence)
|
@@ -97,14 +97,14 @@ for entity in sentence.get_spans('pos'):
|
|
97 |
|
98 |
This yields the following output:
|
99 |
```
|
100 |
-
Span [1
|
101 |
-
Span [
|
102 |
-
Span [
|
103 |
-
Span [
|
104 |
|
105 |
```
|
106 |
|
107 |
-
So, the
|
108 |
|
109 |
|
110 |
---
|
@@ -115,14 +115,18 @@ The following Flair script was used to train this model:
|
|
115 |
|
116 |
```python
|
117 |
from flair.data import Corpus
|
118 |
-
from flair.datasets import
|
119 |
from flair.embeddings import WordEmbeddings, StackedEmbeddings, FlairEmbeddings
|
120 |
|
121 |
-
# 1.
|
122 |
-
corpus: Corpus =
|
|
|
|
|
|
|
|
|
123 |
|
124 |
# 2. what tag do we want to predict?
|
125 |
-
tag_type = '
|
126 |
|
127 |
# 3. make the tag dictionary from the corpus
|
128 |
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
|
@@ -154,7 +158,7 @@ from flair.trainers import ModelTrainer
|
|
154 |
trainer = ModelTrainer(tagger, corpus)
|
155 |
|
156 |
# 7. run training
|
157 |
-
trainer.train('resources/taggers/
|
158 |
train_with_dev=True,
|
159 |
max_epochs=150)
|
160 |
```
|
|
|
79 |
tagger = SequenceTagger.load("flair/pos-english")
|
80 |
|
81 |
# make example sentence
|
82 |
+
sentence = Sentence("I love Berlin.")
|
83 |
|
84 |
# predict NER tags
|
85 |
tagger.predict(sentence)
|
|
|
97 |
|
98 |
This yields the following output:
|
99 |
```
|
100 |
+
Span [1]: "I" [β Labels: PRP (1.0)]
|
101 |
+
Span [2]: "love" [β Labels: VBP (1.0)]
|
102 |
+
Span [3]: "Berlin" [β Labels: NNP (0.9999)]
|
103 |
+
Span [4]: "." [β Labels: . (1.0)]
|
104 |
|
105 |
```
|
106 |
|
107 |
+
So, the word "*I*" is labeled as a **pronoun** (PRP), "*love*" is labeled as a **verb** (VBP) and "*Berlin*" is labeled as a **proper noun** (NNP) in the sentence "*TheI love Berlin*".
|
108 |
|
109 |
|
110 |
---
|
|
|
115 |
|
116 |
```python
|
117 |
from flair.data import Corpus
|
118 |
+
from flair.datasets import ColumnCorpus
|
119 |
from flair.embeddings import WordEmbeddings, StackedEmbeddings, FlairEmbeddings
|
120 |
|
121 |
+
# 1. load the corpus (Ontonotes does not ship with Flair, you need to download and reformat into a column format yourself)
|
122 |
+
corpus: Corpus = ColumnCorpus(
|
123 |
+
"resources/tasks/onto-ner",
|
124 |
+
column_format={0: "text", 1: "pos", 2: "upos", 3: "ner"},
|
125 |
+
tag_to_bioes="ner",
|
126 |
+
)
|
127 |
|
128 |
# 2. what tag do we want to predict?
|
129 |
+
tag_type = 'pos'
|
130 |
|
131 |
# 3. make the tag dictionary from the corpus
|
132 |
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
|
|
|
158 |
trainer = ModelTrainer(tagger, corpus)
|
159 |
|
160 |
# 7. run training
|
161 |
+
trainer.train('resources/taggers/pos-english',
|
162 |
train_with_dev=True,
|
163 |
max_epochs=150)
|
164 |
```
|