Spaces:

vividsd
/

practice

Build error

practice / app.py

Update app.py

814bb5f 10 months ago

No virus

1.39 kB

	# imports

	import gradio as gr
	from transformers import pipeline
	import torch
	import PyPDF2

	# function to read the uploaded PDF and extract its PDF when present based on the keyword "abstract" search.
	# If the PDF doesn't have the word "abstract" it won't work
	# also, I'm trying to limitate to the abstract itself, not to other sections, by adding a pattern of in between headers

	def process_pdf(pdf):
	with open(pdf.name, "rb") as f:
	reader = PyPDF2.PdfReader(f)
	text = ""
	for page in reader.pages:
	text += page.extract_text()
	abstract_start = text.lower().find("abstract:")
	if abstract_start != -1:
	abstract_end = text.lower().find("\n\n", abstract_start)
	if abstract_end != -1:
	abstract = text[abstract_start:abstract_end]
	else:
	abstract = text[abstract_start:]
	else:
	abstract = "Abstract not found."

	return abstract

	#Now creating the interface to read the PDFs

	interface = gr.Interface(fn=process_pdf,
	inputs=gr.inputs.File(type="file", label="Upload PDF"),
	outputs="text",
	title="Summarizing outloud",
	description="Extract abstracts from PDFs, summarize then in 1 sentence and get an audio of it",
	examples=[["example_pdf1.pdf"], ["example_pdf2.pdf"]])

	if __name__ == "__main__":
	interface.launch()