File size: 2,702 Bytes
664c81e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import gradio as gr
import torch
import os
from pipeline import KeywordExtractorPipeline
DIR_PATH = os.path.dirname(os.path.realpath(__file__))
def extract_keyword(title, text, top_n, ngram_low_range, ngram_high_range, min_freq, diversify_result):
inp = {"text": text, "title": title}
keyword_ls = kw_pipeline(inputs=inp, min_freq=min_freq, ngram_n=(ngram_low_range, ngram_high_range),
top_n=top_n, diversify_result=diversify_result)
result = ''
for kw, score in keyword_ls:
result += f'{kw}: {score}\n'
return result
if gr.NO_RELOAD:
print("Loading PhoBERT model")
phobert = torch.load(f'{DIR_PATH}/pretrained-models/phobert.pt')
phobert.eval()
print("Loading NER model")
ner_model = torch.load(f'{DIR_PATH}/pretrained-models/ner-vietnamese-electra-base.pt')
ner_model.eval()
kw_pipeline = KeywordExtractorPipeline(phobert, ner_model)
if __name__ == "__main__":
demo = gr.Interface(fn=extract_keyword,
inputs=[
gr.Text(
label="Title",
lines=1,
value="Enter title here",
),
gr.Textbox(
label="Text",
lines=5,
value="Enter text here",
),
gr.Number(
label="Top N keywords",
info="Number of keywords retrieved",
value=10
),
gr.Number(
label="Ngram low range",
value=1
),
gr.Number(
label="Ngram high range",
value=3
),
gr.Number(
label="Ngram minimum frequency",
value=1
),
gr.Checkbox(
label="Diversify result"
)
],
# inputs=["text", "textbox", "number", "number", "number", "number", "checkbox"],
outputs=gr.Textbox(
label="Keywords Extracted",
)
)
demo.launch(share=True) # Share your demo with just 1 extra parameter 🚀
|