# Import Dependencies
from PyPDF2 import PdfReader
from transformers import pipeline, SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
import torch
import soundfile as sf
from IPython.display import Audio
from datasets import load_dataset
import gradio as gr
import os, re
import shutil

# Gradio needs a tmp directory for file store, creating manually
import os
path = '/tmp/gradio/tmp1biredw9' 
os.makedirs(path, exist_ok=True)

# Loading HuggingFace models
first_model = pipeline(task='summarization',model='pszemraj/long-t5-tglobal-base-16384-book-summary')
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")

def readAbstract(pdf):
  # Extract text from PDF
  reader = PdfReader(pdf)
  # Extract needed page to variable.
  abstract = reader.pages[0]
  abstract = abstract.extract_text()
  # Removing all before 'Abstract' for cleaning
  abstract = abstract[abstract.find('Abstract'):]
  abstract = abstract.split('Introduction', 1)[0]
  return abstract

title = 'PDF Abstracter'
description = 'The model takes a PDF with an abstract as input and summarises it in one sentence that can be read and listened to. Please note that only PDFs with an abstract will work, otherwise there will be an error'
def abstract_summary(file):
    # Set file path for uploaded file
    file_path = "/home/user/app/" + os.path.basename(file)
    shutil.copyfile(file.name, file_path)
    # Extract Abstract from PDF
    pdf = readAbstract(file_path)
    # Run Summarisation Model
    abstract = first_model(pdf)

    # Text cleaning
    abstract = str(abstract)
    abstract = abstract.replace("[","").replace("]","").replace("{","").replace("}","").replace("'","").replace("summary_text: ","")

    # Text to Speech model
    inputs = processor(text=str(abstract), return_tensors="pt")
    embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
    speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
    spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
    vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")

    # Create .wav audio file from above
    with torch.no_grad():
      speech = vocoder(spectrogram)
    speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
    audio = Audio(speech, rate=16000)
    with open('/home/user/app/abstract.wav', 'wb') as f:
        f.write(audio.data)
    audio = os.path.join('/home/user/app/abstract.wav')
    return abstract, audio

gui = gr.Interface(fn=abstract_summary,inputs=["file",],outputs=["text","audio"],title=title,description=description)
gui.launch(debug=True)

gui.close()