Spaces:
Runtime error
Runtime error
File size: 2,216 Bytes
d1ceaed a377fc8 d1ceaed a377fc8 d1ceaed a377fc8 d1ceaed 4bb1137 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import base64
import torch
import io
import tempfile
import scipy.io.wavfile as wavfile
import commons
import utils
import gradio as gr
import numpy as np
from PIL import Image
from models import SynthesizerTrn
from text.symbols import symbols
from text import text_to_sequence
def get_text(text, hps):
text_norm = text_to_sequence(text, hps.data.text_cleaners)
if hps.data.add_blank:
text_norm = commons.intersperse(text_norm, 0)
text_norm = torch.LongTensor(text_norm)
return text_norm
def text_to_speech(text):
stn_tst = get_text(text, hps)
with torch.no_grad():
x_tst = stn_tst.unsqueeze(0)
x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
audio = net_g.infer(x_tst, x_tst_lengths, noise_scale=.667, noise_scale_w=0.8, length_scale=1.2)[0][
0, 0].data.float().numpy()
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
wavfile.write(f.name, hps.data.sampling_rate, audio)
audio_file = f.name
# Return the audio file path
return audio_file
# Load the trained model
hps = utils.get_hparams_from_file("./configs/jp_base.json")
hps.model_dir = './logs/jp_base'
pretrained_model = f'{hps.model_dir}/model.pth'
net_g = SynthesizerTrn(
len(symbols),
hps.data.filter_length // 2 + 1,
hps.train.segment_size // hps.data.hop_length,
**hps.model)
_ = net_g.eval()
_ = utils.load_checkpoint(pretrained_model, net_g, None)
# Define the function that will be used to generate speech from text
def generate_speech(text):
# Use the text_to_speech function to generate speech from text
speech = text_to_speech(text)
# Return the speech as a dictionary with 'audio' as the key
# return {'audio': speech}
return speech
# Define the interface for the text-to-speech model
text_input = gr.inputs.Textbox(label='Enter Text Here')
output_audio = gr.outputs.Audio(label='Speech', type='filepath')
# Define the user interface using Gradio
ui = gr.Interface(
fn=generate_speech,
inputs=text_input,
outputs=output_audio,
title='Text-to-Speech Demo',
description='Generate speech from text using a text-to-speech model.'
)
# Run the interface
ui.launch()
|