File size: 2,702 Bytes
664c81e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import gradio as gr
import torch
import os

from pipeline import KeywordExtractorPipeline

DIR_PATH = os.path.dirname(os.path.realpath(__file__))


def extract_keyword(title, text, top_n, ngram_low_range, ngram_high_range, min_freq, diversify_result):
    inp = {"text": text, "title": title}
    keyword_ls = kw_pipeline(inputs=inp, min_freq=min_freq, ngram_n=(ngram_low_range, ngram_high_range),
                             top_n=top_n, diversify_result=diversify_result)
    result = ''
    for kw, score in keyword_ls:
        result += f'{kw}: {score}\n'
    return result


if gr.NO_RELOAD:
    print("Loading PhoBERT model")
    phobert = torch.load(f'{DIR_PATH}/pretrained-models/phobert.pt')
    phobert.eval()

    print("Loading NER model")
    ner_model = torch.load(f'{DIR_PATH}/pretrained-models/ner-vietnamese-electra-base.pt')
    ner_model.eval()
    kw_pipeline = KeywordExtractorPipeline(phobert, ner_model)

if __name__ == "__main__":
    demo = gr.Interface(fn=extract_keyword,
                        inputs=[
                            gr.Text(
                                label="Title",
                                lines=1,
                                value="Enter title here",
                            ),
                            gr.Textbox(
                                label="Text",
                                lines=5,
                                value="Enter text here",
                            ),
                            gr.Number(
                                label="Top N keywords",
                                info="Number of keywords retrieved",
                                value=10
                            ),
                            gr.Number(
                                label="Ngram low range",
                                value=1
                            ),
                            gr.Number(
                                label="Ngram high range",
                                value=3
                            ),
                            gr.Number(
                                label="Ngram minimum frequency",
                                value=1
                            ),
                            gr.Checkbox(
                                label="Diversify result"
                            )
                        ],
                        # inputs=["text", "textbox", "number", "number", "number", "number", "checkbox"],
                        outputs=gr.Textbox(
                            label="Keywords Extracted",
                        )
                        )

    demo.launch(share=True)  # Share your demo with just 1 extra parameter 🚀