Spaces:

burraco135
/

TTS-voxpopuli-it

Runtime error

File size: 1,086 Bytes

06aba51
7e2ad98
 
 
06aba51
7e2ad98
280610d
7e2ad98
 
 
 
 
 
280610d
5b52d90
 
280610d
7e2ad98
5b52d90
 
 
7e2ad98

import gradio as gr
import librosa
import numpy as np
import torch

from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan

checkpoint = "burraco135/speecht5_finetuned_voxpopuli_it"
processor = SpeechT5Processor.from_pretrained(checkpoint)
model = SpeechT5ForTextToSpeech.from_pretrained(checkpoint)
vocoder = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")

def predict(text, speaker):

    speaker_embedding = "speaker_0_embeddings.npy"
    
    inputs = processor(text=text, return_tensors="pt")

    # # limit input length
    # input_ids = inputs["input_ids"]
    # input_ids = input_ids[..., :model.config.max_text_positions]

    speaker_embedding = torch.tensor(speaker_embedding).unsqueeze(0)

    speech = model.generate_speech(input_ids, speaker_embedding, vocoder=vocoder)

    speech = (speech.numpy() * 32767).astype(np.int16)
    return (16000, speech)

gr.Interface(
    fn=predict,
    inputs=[
        gr.Text(label="Input Text"),
    ],
    outputs=[
        gr.Audio(label="Generated Speech", type="numpy"),
    ]
).launch()