arubenruben
commited on
Commit
•
02dffa0
1
Parent(s):
eb14d01
Update deploy_pipeline.py
Browse files- deploy_pipeline.py +8 -13
deploy_pipeline.py
CHANGED
@@ -14,14 +14,13 @@ class TokenizeAndAlignLabelsStep():
|
|
14 |
# Adapted From : https://huggingface.co/docs/transformers/tasks/token_classification
|
15 |
def tokenize_and_align_labels(self, examples, tokenizer):
|
16 |
|
17 |
-
tokenized_inputs = tokenizer(examples, padding='max_length', truncation=True, max_length=128)
|
18 |
|
19 |
# Map tokens to their respective word.
|
20 |
word_ids = tokenized_inputs.word_ids()
|
21 |
|
22 |
previous_word_idx = None
|
23 |
-
|
24 |
-
tokens= []
|
25 |
labels_mask = []
|
26 |
|
27 |
for word_idx in word_ids: # Set the special tokens to -100.
|
@@ -29,20 +28,17 @@ class TokenizeAndAlignLabelsStep():
|
|
29 |
labels_mask.append(False)
|
30 |
# Only label the first token of a given word.
|
31 |
elif word_idx != previous_word_idx:
|
32 |
-
labels_mask.append(True)
|
33 |
-
tokens.append(tokenized_inputs["input_ids"][word_idx])
|
34 |
else:
|
35 |
labels_mask.append(False)
|
36 |
|
37 |
previous_word_idx = word_idx
|
38 |
|
39 |
-
tokenized_inputs["tokens"] = tokenizer.decode(tokens, skip_special_tokens=True)
|
40 |
tokenized_inputs["labels_mask"] = labels_mask
|
41 |
|
42 |
return tokenized_inputs
|
43 |
|
44 |
|
45 |
-
|
46 |
class BERT_CRF_Pipeline(Pipeline):
|
47 |
|
48 |
def _sanitize_parameters(self, **kwargs):
|
@@ -76,18 +72,17 @@ class BERT_CRF_Pipeline(Pipeline):
|
|
76 |
outputs = self.model(input_ids=input_ids, token_type_ids=token_type_ids,
|
77 |
attention_mask=attention_mask, labels=None, labels_mask=labels_mask)
|
78 |
|
79 |
-
return
|
80 |
|
81 |
-
def postprocess(self,
|
82 |
|
83 |
-
model_outputs = outputs['outputs']
|
84 |
-
tokens = outputs['tokens']
|
85 |
-
|
86 |
# From Ner_tags to Ner_labels
|
87 |
for i, label in enumerate(model_outputs[0]):
|
88 |
model_outputs[0][i] = self.model.config.id2label[label]
|
89 |
|
90 |
-
return model_outputs[0]
|
|
|
|
|
91 |
|
92 |
|
93 |
def main():
|
|
|
14 |
# Adapted From : https://huggingface.co/docs/transformers/tasks/token_classification
|
15 |
def tokenize_and_align_labels(self, examples, tokenizer):
|
16 |
|
17 |
+
tokenized_inputs = tokenizer(examples, padding='max_length', truncation=True, max_length=128, is_split_into_words=True)
|
18 |
|
19 |
# Map tokens to their respective word.
|
20 |
word_ids = tokenized_inputs.word_ids()
|
21 |
|
22 |
previous_word_idx = None
|
23 |
+
|
|
|
24 |
labels_mask = []
|
25 |
|
26 |
for word_idx in word_ids: # Set the special tokens to -100.
|
|
|
28 |
labels_mask.append(False)
|
29 |
# Only label the first token of a given word.
|
30 |
elif word_idx != previous_word_idx:
|
31 |
+
labels_mask.append(True)
|
|
|
32 |
else:
|
33 |
labels_mask.append(False)
|
34 |
|
35 |
previous_word_idx = word_idx
|
36 |
|
|
|
37 |
tokenized_inputs["labels_mask"] = labels_mask
|
38 |
|
39 |
return tokenized_inputs
|
40 |
|
41 |
|
|
|
42 |
class BERT_CRF_Pipeline(Pipeline):
|
43 |
|
44 |
def _sanitize_parameters(self, **kwargs):
|
|
|
72 |
outputs = self.model(input_ids=input_ids, token_type_ids=token_type_ids,
|
73 |
attention_mask=attention_mask, labels=None, labels_mask=labels_mask)
|
74 |
|
75 |
+
return outputs
|
76 |
|
77 |
+
def postprocess(self, model_outputs):
|
78 |
|
|
|
|
|
|
|
79 |
# From Ner_tags to Ner_labels
|
80 |
for i, label in enumerate(model_outputs[0]):
|
81 |
model_outputs[0][i] = self.model.config.id2label[label]
|
82 |
|
83 |
+
return model_outputs[0]
|
84 |
+
|
85 |
+
|
86 |
|
87 |
|
88 |
def main():
|