MeloTTS

Runtime error

File size: 1,461 Bytes

4300fed
 
e383d75
19dca80
4300fed
 
 
886cf85
 
 
 
 
 
19dca80
 
d798cbb
19dca80
372bdc9
e383d75
509f50e
 
19dca80
406e977
 
 
19dca80
4300fed
 
e383d75
 
 
376a452
b8401e1
4300fed
 
 
dc00cc0
7a62809

import gradio as gr
import os, torch, io
import sys
#os.system('python -m unidic download')
from melo.api import TTS
speed = 1.0
import tempfile
if torch.cuda.is_available():
    device = "cuda"
elif torch.backends.mps.is_available():
    device = "mps"
else:
    device = "cpu"

languages = ["EN", "ES", "FR", "ZH", "JP", "KR"]
en = ["EN-Default", "EN-US", "EN-BR", "EN_INDIA", "EN-AU"]

LANG = sys.argv[1].strip()

def synthesize(speaker, text, speed=1.0, progress=gr.Progress()):
    model = TTS(language=LANG, device=device)
    speaker_ids = model.hps.data.spk2id
    bio = io.BytesIO()
    model.tts_to_file(text, speaker_ids[speaker], bio, speed=speed, pbar=progress.tqdm, format='wav')
    return bio.getvalue()

with gr.Blocks() as demo:
    with gr.Group():
        if LANG == "EN":
            speaker = gr.Dropdown(en, interactive=True, value='EN-Default', label='Speaker')
        else:
            speaker = gr.Dropdown([LANG], interactive=True, value=LANG, label='Speaker')
        speed = gr.Slider(label='Speed', minimum=0.1, maximum=10.0, value=1.0, interactive=True, step=0.1)
        text = gr.Textbox(label="Text to speak", value='The field of text to speech has seen rapid development recently')
    btn = gr.Button('Synthesize', variant='primary')
    aud = gr.Audio(interactive=False)
    btn.click(synthesize, inputs=[speaker, text, speed], outputs=[aud])
demo.queue(api_open=False, default_concurrency_limit=10).launch(show_api=False)