File size: 2,893 Bytes
467473c
 
 
 
 
 
 
 
 
 
 
 
7f09a76
 
467473c
7f09a76
467473c
7f09a76
467473c
7f09a76
467473c
7f09a76
467473c
7f09a76
467473c
7f09a76
467473c
7f09a76
467473c
 
 
 
 
 
 
 
 
 
 
7f09a76
467473c
 
 
 
 
 
 
 
 
 
 
8912724
 
 
 
 
 
 
467473c
c25979e
 
 
77e6fc1
c25979e
467473c
 
 
c25979e
 
467473c
 
 
 
 
8912724
 
467473c
93260e9
467473c
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from sumy.parsers.plaintext import PlaintextParser
from sumy.parsers.html import HtmlParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words

import gradio as gr

import nltk
nltk.download('punkt')


def summarize(method, language, sentence_count, input_type, input_):
  if method== 'LSA':
    from sumy.summarizers.lsa import LsaSummarizer as Summarizer
  if method=='text-rank':
    from sumy.summarizers.text_rank import TextRankSummarizer as Summarizer
  if method=='lex-rank':
    from sumy.summarizers.lex_rank import LexRankSummarizer as Summarizer
  if method=='edmundson':
    from sumy.summarizers.edmundson import EdmundsonSummarizer as Summarizer
  if method=='luhn':
    from sumy.summarizers.luhn import LuhnSummarizer as Summarizer
  if method=='kl-sum':
    from sumy.summarizers.kl import KLSummarizer as Summarizer
  if method=='random':
    from sumy.summarizers.random import RandomSummarizer as Summarizer
  if method=='reduction':
    from sumy.summarizers.reduction import ReductionSummarizer as Summarizer

  if input_type=="URL":
    parser = HtmlParser.from_url(input_, Tokenizer(language))
  if input_type=="text":
    parser = PlaintextParser.from_string(input_, Tokenizer(language))
    
  stemmer = Stemmer(language)
  summarizer = Summarizer(stemmer)
  stop_words = get_stop_words(language)

  if method=='edmundson':
    summarizer.null_words = stop_words
    summarizer.bonus_words = parser.significant_words
    summarizer.stigma_words = parser.stigma_words
  else:
    summarizer.stop_words = stop_words

  summary_sentences = summarizer(parser.document, sentence_count)
  summary = ' '.join([str(sentence) for sentence in summary_sentences])
  
  return summary

title = "sumy library space for automatic text summarization"

description = """
This is a space for [sumy](https://github.com/miso-belica/sumy), an automatic text summarization library. 
The summary can be extracted either from an HTML page or plain text. You can find a list of available summarization 
methods [here](https://github.com/miso-belica/sumy/blob/main/docs/summarizators.md).
"""

methods = ["LSA", "luhn", "edmundson", "text-rank", "lex-rank", "random", "reduction", "kl-sum"]

supported_languages = ["english", "french", "chinese", "czech", "german", "italian", "hebrew", 
                        "japanese", "portuguese", "slovak", "spanish", "ukrainian", "greek"]

iface = gr.Interface(
    summarize,
    [
      gr.inputs.Dropdown(methods),
      gr.inputs.Dropdown(supported_languages),
      gr.inputs.Number(default=5),
      gr.inputs.Radio(["URL", "text"], default="URL"),
      gr.inputs.Textbox(5),
    ],
    "text",
    title=title,
    description=description,
    examples=[
        ["luhn", 'english', 2, "URL", "https://en.wikipedia.org/wiki/Automatic_summarization"]
    ],
)

iface.launch()