Spaces:

burraco135
/

TTS-voxpopuli-it

Runtime error

File size: 1,072 Bytes

06aba51
7e2ad98
 
 
06aba51
7e2ad98
280610d
7e2ad98
 
 
 
 
6be449f
 
7e2ad98
280610d
 
7e2ad98

import gradio as gr
import librosa
import numpy as np
import torch

from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan

checkpoint = "burraco135/speecht5_finetuned_voxpopuli_it"
processor = SpeechT5Processor.from_pretrained(checkpoint)
model = SpeechT5ForTextToSpeech.from_pretrained(checkpoint)
vocoder = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")

speaker_embedding = "speaker_0_embeddings.npy"

def predict(text, speaker):

    inputs = processor(text=text, return_tensors="pt")

    # limit input length
    input_ids = inputs["input_ids"]
    input_ids = input_ids[..., :model.config.max_text_positions]

    speaker_embedding = torch.tensor(speaker_embedding).unsqueeze(0)

    speech = model.generate_speech(input_ids, speaker_embedding, vocoder=vocoder)

    speech = (speech.numpy() * 32767).astype(np.int16)
    return (16000, speech)

gr.Interface(
    fn=predict,
    inputs=[
        gr.Text(label="Input Text"),
    ],
    outputs=[
        gr.Audio(label="Generated Speech", type="numpy"),
    ]
).launch()