persian-tts-playground

Runtime error

File size: 2,307 Bytes

4d9475f
4712ee3
 
4d9475f
4712ee3
4d9475f
 
 
4712ee3
4d9475f
 
e4b3e78
 
 
 
6ff371b
e4b3e78
 
 
4712ee3
d60ac5e
4712ee3
6ff371b
 
 
 
e4b3e78
6ff371b
 
 
 
 
 
 
 
 
d60ac5e
1981b8a
4712ee3
 
4d9475f
 
4712ee3
 
6ff371b
4d9475f
4712ee3
e4b3e78
4d9475f
4712ee3
4d9475f
 
 
 
d60ac5e
4712ee3
 
 
 
 
 
 
 
d60ac5e
 
4712ee3
 
 
 
f911972

import os
import tempfile
import gradio as gr
from TTS.utils.synthesizer import Synthesizer
from huggingface_hub import hf_hub_download

# Define constants
MODEL_INFO = [
    ["vits-espeak-57000", "checkpoint_57000.pth", "config.json", "mhrahmani/persian-tts-vits-0"],
]

# # Extract model names from MODEL_INFO
# MODEL_NAMES = [info[0] for info in MODEL_INFO]

MODEL_NAMES = [
    "vits checkpoint 57000",
    # Add other model names similarly...
]

MAX_TXT_LEN = 400
TOKEN = os.getenv('HUGGING_FACE_HUB_TOKEN')

# # Download models
# for model_name, model_file, config_file, repo_name in MODEL_INFO:
#     os.makedirs(model_name, exist_ok=True)
#     print(f"|> Downloading: {model_name}")
    
#     # Use hf_hub_download to download models from private Hugging Face repositories
#     hf_hub_download(repo_id=repo_name, filename=model_file, use_auth_token=TOKEN)
#     hf_hub_download(repo_id=repo_name, filename=config_file, use_auth_token=TOKEN)

repo_name = "mhrahmani/persian-tts-vits-0"
filename = "checkpoint_57000.pth"

model_file = hf_hub_download(repo_name, filename, use_auth_token=TOKEN)
config_file = hf_hub_download(repo_name, "config.json", use_auth_token=TOKEN)


def synthesize(text: str, model_name: str) -> str:
    """Synthesize speech using the selected model."""
    if len(text) > MAX_TXT_LEN:
        text = text[:MAX_TXT_LEN]
        print(f"Input text was cut off as it exceeded the {MAX_TXT_LEN} character limit.")
    
    synthesizer = Synthesizer(model_file, config_file)
    if synthesizer is None:
        raise NameError("Model not found")
    
    wavs = synthesizer.tts(text)
    
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
        synthesizer.save_wav(wavs, fp)
        return fp.name


iface = gr.Interface(
    fn=synthesize,
    inputs=[
        gr.Textbox(label="Enter Text to Synthesize:", value="زین همرهان سست عناصر، دلم گرفت."),
        gr.Radio(label="Pick a Model", choices=MODEL_NAMES, value=MODEL_NAMES[0]),
    ],
    outputs=gr.Audio(label="Output", type='filepath'),
    examples=[["زین همرهان سست عناصر، دلم گرفت.", MODEL_NAMES[0]]],
    title='Persian TTS Playground',
    description="Persian text to speech model demo",
    article="",
    live=False
)

iface.launch()