Spaces:
Running
Running
DeDeckerThomas
commited on
Commit
•
860c19c
1
Parent(s):
24f3280
Fix empty keyphrase bug
Browse files
extraction/keyphrase_extraction_pipeline.py
DELETED
@@ -1,24 +0,0 @@
|
|
1 |
-
from transformers import (
|
2 |
-
TokenClassificationPipeline,
|
3 |
-
AutoModelForTokenClassification,
|
4 |
-
AutoTokenizer,
|
5 |
-
)
|
6 |
-
from transformers.pipelines import AggregationStrategy
|
7 |
-
import numpy as np
|
8 |
-
|
9 |
-
|
10 |
-
class KeyphraseExtractionPipeline(TokenClassificationPipeline):
|
11 |
-
def __init__(self, model, *args, **kwargs):
|
12 |
-
super().__init__(
|
13 |
-
model=AutoModelForTokenClassification.from_pretrained(model),
|
14 |
-
tokenizer=AutoTokenizer.from_pretrained(model),
|
15 |
-
*args,
|
16 |
-
**kwargs
|
17 |
-
)
|
18 |
-
|
19 |
-
def postprocess(self, model_outputs):
|
20 |
-
results = super().postprocess(
|
21 |
-
model_outputs=model_outputs,
|
22 |
-
aggregation_strategy=AggregationStrategy.SIMPLE,
|
23 |
-
)
|
24 |
-
return np.unique([result.get("word").strip() for result in results])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
extraction/keyphrase_generation_pipeline.py
DELETED
@@ -1,28 +0,0 @@
|
|
1 |
-
from transformers import (
|
2 |
-
Text2TextGenerationPipeline,
|
3 |
-
AutoModelForSeq2SeqLM,
|
4 |
-
AutoTokenizer,
|
5 |
-
)
|
6 |
-
|
7 |
-
|
8 |
-
class KeyphraseGenerationPipeline(Text2TextGenerationPipeline):
|
9 |
-
def __init__(self, model, keyphrase_sep_token=";", *args, **kwargs):
|
10 |
-
super().__init__(
|
11 |
-
model=AutoModelForSeq2SeqLM.from_pretrained(model),
|
12 |
-
tokenizer=AutoTokenizer.from_pretrained(model),
|
13 |
-
*args,
|
14 |
-
**kwargs
|
15 |
-
)
|
16 |
-
self.keyphrase_sep_token = keyphrase_sep_token
|
17 |
-
|
18 |
-
def postprocess(self, model_outputs):
|
19 |
-
results = super().postprocess(model_outputs=model_outputs)
|
20 |
-
return [
|
21 |
-
[
|
22 |
-
keyphrase.strip()
|
23 |
-
for keyphrase in result.get("generated_text").split(
|
24 |
-
self.keyphrase_sep_token
|
25 |
-
)
|
26 |
-
]
|
27 |
-
for result in results
|
28 |
-
][0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pipelines/__pycache__/keyphrase_generation_pipeline.cpython-39.pyc
CHANGED
Binary files a/pipelines/__pycache__/keyphrase_generation_pipeline.cpython-39.pyc and b/pipelines/__pycache__/keyphrase_generation_pipeline.cpython-39.pyc differ
|
|
pipelines/keyphrase_generation_pipeline.py
CHANGED
@@ -24,6 +24,7 @@ class KeyphraseGenerationPipeline(Text2TextGenerationPipeline):
|
|
24 |
for keyphrase in result.get("generated_text").split(
|
25 |
self.keyphrase_sep_token
|
26 |
)
|
|
|
27 |
]
|
28 |
for result in results
|
29 |
][0]
|
|
|
24 |
for keyphrase in result.get("generated_text").split(
|
25 |
self.keyphrase_sep_token
|
26 |
)
|
27 |
+
if keyphrase != ""
|
28 |
]
|
29 |
for result in results
|
30 |
][0]
|