Spaces:
Running
on
T4
Running
on
T4
import gradio as gr | |
from TTS.api import TTS | |
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1", gpu=True) | |
def predict(prompt, language, audio_file_pth): | |
tts.tts_to_file( | |
text=prompt, | |
file_path="output.wav", | |
speaker_wav=audio_file_pth, | |
language=language, | |
) | |
return gr.make_waveform( | |
audio="output.wav", | |
) | |
title = "XTTS: MVP" | |
gr.Interface( | |
fn=predict, | |
inputs=[ | |
gr.Textbox( | |
label="Text Prompt", | |
info="One or two sentences at a time is better", | |
placeholder="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.", | |
), | |
gr.Dropdown( | |
label="Language", | |
info="Select an output language for the synthesised speech", | |
choices=[ | |
"en", | |
"es", | |
"fr", | |
"de", | |
"it", | |
"pt", | |
"pl", | |
"tr", | |
"ru", | |
"nl", | |
"cz", | |
"ar", | |
"zh", | |
], | |
max_choices=1, | |
), | |
gr.Audio( | |
label="Reference Audio", | |
info="Upload a reference audio for target speaker voice", | |
type="filepath", | |
), | |
], | |
outputs=[ | |
gr.Video(label="Synthesised Speech"), | |
], | |
title=title, | |
).launch(debug=True) | |