Spaces:
Runtime error
Runtime error
import gradio as gr | |
import base64 | |
import torch | |
import io | |
import scipy.io.wavfile as wavfile | |
from PIL import Image | |
import numpy as np | |
import commons | |
import utils | |
from models import SynthesizerTrn | |
from text.symbols import symbols | |
from text import text_to_sequence | |
import subprocess | |
import os | |
import tempfile | |
def get_text(text, hps): | |
text_norm = text_to_sequence(text, hps.data.text_cleaners) | |
if hps.data.add_blank: | |
text_norm = commons.intersperse(text_norm, 0) | |
text_norm = torch.LongTensor(text_norm) | |
return text_norm | |
def text_to_speech(text): | |
stn_tst = get_text(text, hps) | |
with torch.no_grad(): | |
x_tst = stn_tst.unsqueeze(0) | |
x_tst_lengths = torch.LongTensor([stn_tst.size(0)]) | |
audio = net_g.infer(x_tst, x_tst_lengths, noise_scale=.667, noise_scale_w=0.8, length_scale=1.2)[0][ | |
0, 0].data.float().numpy() | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: | |
wavfile.write(f.name, hps.data.sampling_rate, audio) | |
audio_file = f.name | |
# Return the audio file path | |
return audio_file | |
# Load the trained model | |
hps = utils.get_hparams_from_file("./configs/jp_base.json") | |
hps.model_dir = './logs/jp_base' | |
pretrained_model = f'{hps.model_dir}/model.pth' | |
net_g = SynthesizerTrn( | |
len(symbols), | |
hps.data.filter_length // 2 + 1, | |
hps.train.segment_size // hps.data.hop_length, | |
**hps.model) | |
_ = net_g.eval() | |
if os.path.isfile(pretrained_model): | |
_ = utils.load_checkpoint(pretrained_model, net_g, None) | |
else: | |
# Run the shell script | |
subprocess.call('./startup.sh', shell=True) | |
_ = utils.load_checkpoint(pretrained_model, net_g, None) | |
# Define the function that will be used to generate speech from text | |
def generate_speech(text): | |
# Use the text_to_speech function to generate speech from text | |
speech = text_to_speech(text) | |
# Return the speech as a dictionary with 'audio' as the key | |
# return {'audio': speech} | |
return speech | |
# Define the interface for the text-to-speech model | |
text_input = gr.inputs.Textbox(label='Enter Text Here') | |
output_audio = gr.outputs.Audio(label='Speech', type='filepath') | |
# Define the user interface using Gradio | |
ui = gr.Interface( | |
fn=generate_speech, | |
inputs=text_input, | |
outputs=output_audio, | |
title='Text-to-Speech Demo', | |
description='Generate speech from text using a text-to-speech model.' | |
) | |
# Run the interface | |
ui.launch(share=True) | |