DeDeckerThomas commited on
Commit
860c19c
1 Parent(s): 24f3280

Fix empty keyphrase bug

Browse files
extraction/keyphrase_extraction_pipeline.py DELETED
@@ -1,24 +0,0 @@
1
- from transformers import (
2
- TokenClassificationPipeline,
3
- AutoModelForTokenClassification,
4
- AutoTokenizer,
5
- )
6
- from transformers.pipelines import AggregationStrategy
7
- import numpy as np
8
-
9
-
10
- class KeyphraseExtractionPipeline(TokenClassificationPipeline):
11
- def __init__(self, model, *args, **kwargs):
12
- super().__init__(
13
- model=AutoModelForTokenClassification.from_pretrained(model),
14
- tokenizer=AutoTokenizer.from_pretrained(model),
15
- *args,
16
- **kwargs
17
- )
18
-
19
- def postprocess(self, model_outputs):
20
- results = super().postprocess(
21
- model_outputs=model_outputs,
22
- aggregation_strategy=AggregationStrategy.SIMPLE,
23
- )
24
- return np.unique([result.get("word").strip() for result in results])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
extraction/keyphrase_generation_pipeline.py DELETED
@@ -1,28 +0,0 @@
1
- from transformers import (
2
- Text2TextGenerationPipeline,
3
- AutoModelForSeq2SeqLM,
4
- AutoTokenizer,
5
- )
6
-
7
-
8
- class KeyphraseGenerationPipeline(Text2TextGenerationPipeline):
9
- def __init__(self, model, keyphrase_sep_token=";", *args, **kwargs):
10
- super().__init__(
11
- model=AutoModelForSeq2SeqLM.from_pretrained(model),
12
- tokenizer=AutoTokenizer.from_pretrained(model),
13
- *args,
14
- **kwargs
15
- )
16
- self.keyphrase_sep_token = keyphrase_sep_token
17
-
18
- def postprocess(self, model_outputs):
19
- results = super().postprocess(model_outputs=model_outputs)
20
- return [
21
- [
22
- keyphrase.strip()
23
- for keyphrase in result.get("generated_text").split(
24
- self.keyphrase_sep_token
25
- )
26
- ]
27
- for result in results
28
- ][0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pipelines/__pycache__/keyphrase_generation_pipeline.cpython-39.pyc CHANGED
Binary files a/pipelines/__pycache__/keyphrase_generation_pipeline.cpython-39.pyc and b/pipelines/__pycache__/keyphrase_generation_pipeline.cpython-39.pyc differ
 
pipelines/keyphrase_generation_pipeline.py CHANGED
@@ -24,6 +24,7 @@ class KeyphraseGenerationPipeline(Text2TextGenerationPipeline):
24
  for keyphrase in result.get("generated_text").split(
25
  self.keyphrase_sep_token
26
  )
 
27
  ]
28
  for result in results
29
  ][0]
 
24
  for keyphrase in result.get("generated_text").split(
25
  self.keyphrase_sep_token
26
  )
27
+ if keyphrase != ""
28
  ]
29
  for result in results
30
  ][0]