# Import Dependencies from PyPDF2 import PdfReader from transformers import pipeline, SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan import torch import soundfile as sf from IPython.display import Audio from datasets import load_dataset import gradio as gr import os, re import shutil # Gradio needs a tmp directory for file store, creating manually import os path = '/tmp/gradio/tmp1biredw9' os.makedirs(path, exist_ok=True) # Loading HuggingFace models first_model = pipeline(task='summarization',model='pszemraj/long-t5-tglobal-base-16384-book-summary') processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts") model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts") def readAbstract(pdf): # Extract text from PDF reader = PdfReader(pdf) # Extract needed page to variable. abstract = reader.pages[0] abstract = abstract.extract_text() # Removing all before 'Abstract' for cleaning abstract = abstract[abstract.find('Abstract'):] abstract = abstract.split('Introduction', 1)[0] return abstract title = 'PDF Abstracter' description = 'The model takes a PDF with an abstract as input and summarises it in one sentence that can be read and listened to. Please note that only PDFs with an abstract will work, otherwise there will be an error' def abstract_summary(file): # Set file path for uploaded file file_path = "/home/user/app/" + os.path.basename(file) shutil.copyfile(file.name, file_path) # Extract Abstract from PDF pdf = readAbstract(file_path) # Run Summarisation Model abstract = first_model(pdf) # Text cleaning abstract = str(abstract) abstract = abstract.replace("[","").replace("]","").replace("{","").replace("}","").replace("'","").replace("summary_text: ","") # Text to Speech model inputs = processor(text=str(abstract), return_tensors="pt") embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation") speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0) spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings) vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan") # Create .wav audio file from above with torch.no_grad(): speech = vocoder(spectrogram) speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder) audio = Audio(speech, rate=16000) with open('/home/user/app/abstract.wav', 'wb') as f: f.write(audio.data) audio = os.path.join('/home/user/app/abstract.wav') return abstract, audio gui = gr.Interface(fn=abstract_summary,inputs=["file",],outputs=["text","audio"],title=title,description=description) gui.launch(debug=True) gui.close()