Spaces:

MHayden
/

opit_assignment

Sleeping

App Files Files Community

opit_assignment / app.py

MHayden

Update app.py

0717365 12 months ago

raw

history blame contribute delete

2.77 kB

	# Import Dependencies
	from PyPDF2 import PdfReader
	from transformers import pipeline, SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
	import torch
	import soundfile as sf
	from IPython.display import Audio
	from datasets import load_dataset
	import gradio as gr
	import os, re
	import shutil

	# Gradio needs a tmp directory for file store, creating manually
	import os
	path = '/tmp/gradio/tmp1biredw9'
	os.makedirs(path, exist_ok=True)

	# Loading HuggingFace models
	first_model = pipeline(task='summarization',model='pszemraj/long-t5-tglobal-base-16384-book-summary')
	processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
	model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")

	def readAbstract(pdf):
	# Extract text from PDF
	reader = PdfReader(pdf)
	# Extract needed page to variable.
	abstract = reader.pages[0]
	abstract = abstract.extract_text()
	# Removing all before 'Abstract' for cleaning
	abstract = abstract[abstract.find('Abstract'):]
	abstract = abstract.split('Introduction', 1)[0]
	return abstract

	title = 'PDF Abstracter'
	description = 'The model takes a PDF with an abstract as input and summarises it in one sentence that can be read and listened to. Please note that only PDFs with an abstract will work, otherwise there will be an error'
	def abstract_summary(file):
	# Set file path for uploaded file
	file_path = "/home/user/app/" + os.path.basename(file)
	shutil.copyfile(file.name, file_path)
	# Extract Abstract from PDF
	pdf = readAbstract(file_path)
	# Run Summarisation Model
	abstract = first_model(pdf)

	# Text cleaning
	abstract = str(abstract)
	abstract = abstract.replace("[","").replace("]","").replace("{","").replace("}","").replace("'","").replace("summary_text: ","")

	# Text to Speech model
	inputs = processor(text=str(abstract), return_tensors="pt")
	embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
	speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
	spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
	vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")

	# Create .wav audio file from above
	with torch.no_grad():
	speech = vocoder(spectrogram)
	speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
	audio = Audio(speech, rate=16000)
	with open('/home/user/app/abstract.wav', 'wb') as f:
	f.write(audio.data)
	audio = os.path.join('/home/user/app/abstract.wav')
	return abstract, audio

	gui = gr.Interface(fn=abstract_summary,inputs=["file",],outputs=["text","audio"],title=title,description=description)
	gui.launch(debug=True)

	gui.close()