from espnet2.bin.tts_inference import Text2Speech import soundfile as sf import gradio as gr import subprocess subprocess.check_output("git lfs install", shell=True) subprocess.check_output("git clone https://huggingface.co/DigitalUmuganda/lingala_vits_tts", shell=True) def generate_audio(text): text2speech = Text2Speech(train_config="lingala_vits_tts/config.yaml",model_file="lingala_vits_tts/train.total_count.best.pth") wav = text2speech(text)["wav"] sf.write("outfile.wav", wav.numpy(), text2speech.fs, "PCM_16") return "outfile.wav" iface = gr.Interface( fn=generate_audio, inputs=[ gr.inputs.Textbox( label="Entrez le text", ), ], outputs=gr.outputs.Audio(type="filepath",label="Output"), #outputs=gr.outputs.Textbox(label="Recognized speech from speechbrain model"), title="Lingala TTS", description="Logiciel capable de creer de son a partir de texte en Lingala", layout="vertical", allow_flagging=False, flagging_options=['erreur', 'mauvaise-qualite', 'mauvaise-prononciation'], ) iface.launch(share=False)