import streamlit as st import tempfile import os from TTS.config import load_config from TTS.utils.manage import ModelManager from TTS.utils.synthesizer import Synthesizer from TTS.utils.download import download_url # Define constants MAX_TXT_LEN = 800 MODEL_INFO = [ # ["Model Name", "Model File", "Config File", "URL"] # Add other models in the same format ["vits-espeak-57000", "checkpoint_57000.pth", "config.json", "https://huggingface.co/mhrahmani/persian-tts-vits-0/tree/main"], # ... ] # Download models def download_models(): for model_name, model_file, config_file, url in MODEL_INFO: directory = model_name os.makedirs(directory, exist_ok=True) download_url(f"{url}{model_file}", directory, str(model_file)) download_url(f"{url}{config_file}", directory, "config.json") # Load a model and perform TTS def synthesize_speech(text, model_name): if len(text) > MAX_TXT_LEN: text = text[:MAX_TXT_LEN] st.warning(f"Input text was truncated to {MAX_TXT_LEN} characters.") synthesizer = Synthesizer(f"{model_name}/best_model.pth", f"{model_name}/config.json") if synthesizer is None: st.error("Model not found!") return None wavs = synthesizer.tts(text) with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: synthesizer.save_wav(wavs, fp) return fp.name # Streamlit app def main(): st.title('persian tts playground') st.markdown(""" Persian TTS Demo) """) text_input = st.text_area("Enter Text to Synthesize:", "زین همرهان سست عناصر، دلم گرفت.") model_name = st.selectbox("Pick a TTS Model", [info[0] for info in MODEL_INFO], index=1) if st.button('Synthesize'): audio_file = synthesize_speech(text_input, model_name) if audio_file: st.audio(audio_file, format='audio/wav') # Download models and run the Streamlit app if __name__ == "__main__": download_models() main()