import base64 import torch import io import tempfile import scipy.io.wavfile as wavfile import commons import utils import gradio as gr import numpy as np from PIL import Image from models import SynthesizerTrn from text.symbols import symbols from text import text_to_sequence def get_text(text, hps): text_norm = text_to_sequence(text, hps.data.text_cleaners) if hps.data.add_blank: text_norm = commons.intersperse(text_norm, 0) text_norm = torch.LongTensor(text_norm) return text_norm def text_to_speech(text): stn_tst = get_text(text, hps) with torch.no_grad(): x_tst = stn_tst.unsqueeze(0) x_tst_lengths = torch.LongTensor([stn_tst.size(0)]) audio = net_g.infer(x_tst, x_tst_lengths, noise_scale=.667, noise_scale_w=0.8, length_scale=1.2)[0][ 0, 0].data.float().numpy() with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: wavfile.write(f.name, hps.data.sampling_rate, audio) audio_file = f.name # Return the audio file path return audio_file # Load the trained model hps = utils.get_hparams_from_file("./configs/jp_base.json") hps.model_dir = './logs/jp_base' pretrained_model = f'{hps.model_dir}/model.pth' net_g = SynthesizerTrn( len(symbols), hps.data.filter_length // 2 + 1, hps.train.segment_size // hps.data.hop_length, **hps.model) _ = net_g.eval() _ = utils.load_checkpoint(pretrained_model, net_g, None) # Define the function that will be used to generate speech from text def generate_speech(text): # Use the text_to_speech function to generate speech from text speech = text_to_speech(text) # Return the speech as a dictionary with 'audio' as the key # return {'audio': speech} return speech # Define the interface for the text-to-speech model text_input = gr.inputs.Textbox(label='Enter Text Here') output_audio = gr.outputs.Audio(label='Speech', type='filepath') # Define the user interface using Gradio ui = gr.Interface( fn=generate_speech, inputs=text_input, outputs=output_audio, title='Text-to-Speech for Japanese Demo', description='Generate speech from japanese text using a text-to-speech model.' ) # Run the interface ui.launch()