# imports import gradio as gr from transformers import pipeline import torch import PyPDF2 # function to read the uploaded PDF and extract its PDF when present based on the keyword "abstract" search. # If the PDF doesn't have the word "abstract" it won't work # also, I'm trying to limitate to the abstract itself, not to other sections, by adding a pattern of in between headers def process_pdf(pdf): with open(pdf.name, "rb") as f: reader = PyPDF2.PdfReader(f) text = "" for page in reader.pages: text += page.extract_text() abstract_start = text.lower().find("abstract:") if abstract_start != -1: abstract_end = text.lower().find("\n\n", abstract_start) if abstract_end != -1: abstract = text[abstract_start:abstract_end] else: abstract = text[abstract_start:] else: abstract = "Abstract not found." return abstract #Now creating the interface to read the PDFs interface = gr.Interface(fn=process_pdf, inputs=gr.inputs.File(type="file", label="Upload PDF"), outputs="text", title="Summarizing outloud", description="Extract abstracts from PDFs, summarize then in 1 sentence and get an audio of it", examples=[["example_pdf1.pdf"], ["example_pdf2.pdf"]]) if __name__ == "__main__": interface.launch()