azsalihu commited on
Commit
6720d31
1 Parent(s): a49a196

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -0
app.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import PyPDF2
3
+ import gradio as gr
4
+ from IPython.display import Audio, display
5
+ from transformers import pipeline
6
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
7
+ import numpy as np
8
+ import scipy
9
+ from gtts import gTTS
10
+ from io import BytesIO
11
+
12
+ def extract_text(article):
13
+ pdfReader = PyPDF2.PdfReader(article)
14
+ pageObj = pdfReader.pages[0]
15
+ return pageObj.extract_text()
16
+
17
+ def summarize_abstract(text):
18
+ sentences = text.split(". ")
19
+ for i, sentence in enumerate(sentences):
20
+ if "Abstract" in sentence:
21
+ start = i + 1
22
+ end = start + 6
23
+ break
24
+ abstract = ". ".join(sentences[start:end+1])
25
+ tokenizer = AutoTokenizer.from_pretrained("pszemraj/led-base-book-summary")
26
+ model = AutoModelForSeq2SeqLM.from_pretrained("pszemraj/led-base-book-summary")
27
+
28
+ # Tokenize abstract
29
+ inputs = tokenizer(abstract, max_length=1024, return_tensors="pt", truncation=True)
30
+
31
+ # Generate summary
32
+ summary_ids = model.generate(inputs['input_ids'], max_length=50, min_length=30, no_repeat_ngram_size=3, encoder_no_repeat_ngram_size=3, repetition_penalty=3.5, num_beams=4, do_sample=True,early_stopping=False)
33
+
34
+ summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
35
+
36
+ if '.' in summary:
37
+ index = summary.rindex('.')
38
+ if index != -1:
39
+ summary = summary[:index+1]
40
+
41
+ return summary
42
+
43
+ def abstract_to_audio(text):
44
+ tts = gTTS(text, lang='en')
45
+ buffer = BytesIO()
46
+ tts.write_to_fp(buffer)
47
+ buffer.seek(0)
48
+ return buffer.read()
49
+
50
+ def abstract_audio(article):
51
+ text = extract_text(article)
52
+ summary = summarize_abstract(text)
53
+ audio = abstract_to_audio(summary)
54
+ return summary, audio
55
+
56
+ inputs = gr.File()
57
+ summary_text = gr.Text()
58
+ audio_summary = gr.Audio()
59
+
60
+
61
+ myApp = gr.Interface( fn= abstract_audio, inputs=gr.File(),
62
+ outputs=[gr.Text(),gr.Audio()], title="Summary of Abstract to Audio ", description="An App that helps you summarises the abstract of an Article\Journal and gives the audio of the summary", examples=["/content/NIPS-2015-hidden-technical-debt-in-machine-learning-systems-Paper.pdf"]
63
+ )
64
+
65
+ myApp.launch()