Spaces:

coqui
/

xtts

Running on T4

Vaibhav Srivastav commited on Sep 14, 2023

Commit

142fdc7

•

1 Parent(s): 33e0dc8

up

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,14 +3,18 @@ from TTS.api import TTS
 tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1", gpu=True)
-def predict(prompt, language, audio_file_pth):
-    tts.tts_to_file(text=prompt,
-                file_path="output.wav",
-                speaker_wav=audio_file_pth,
-                language=language)
-    return gr.make_waveform(audio="output.wav",)
 title = "XTTS: MVP"
@@ -18,12 +22,39 @@ title = "XTTS: MVP"
 gr.Interface(
     fn=predict,
     inputs=[
-        gr.Textbox(label="Prompt", info = "One or two sentences at a time is better* (max: 10)", placeholder = "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",),
-        gr.Dropdown(choices=["en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cz", "ar", "zh"], max_choices=1),
-        gr.Audio(label="Upload Speaker WAV", type="filepath"),
     ],
     outputs=[
         gr.Video(label="Synthesised Speech"),
     ],
     title=title,
-).launch(debug=True)

 tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1", gpu=True)
+def predict(prompt, language, audio_file_pth):
+    tts.tts_to_file(
+        text=prompt,
+        file_path="output.wav",
+        speaker_wav=audio_file_pth,
+        language=language,
+    )
+    return gr.make_waveform(
+        audio="output.wav",
+    )
 title = "XTTS: MVP"
 gr.Interface(
     fn=predict,
     inputs=[
+        gr.Textbox(
+            label="Text Prompt",
+            info="One or two sentences at a time is better",
+            placeholder="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
+        ),
+        gr.Dropdown(
+            label="Language",
+            info="Select an output language for the synthesised speech",
+            choices=[
+                "en",
+                "es",
+                "fr",
+                "de",
+                "it",
+                "pt",
+                "pl",
+                "tr",
+                "ru",
+                "nl",
+                "cz",
+                "ar",
+                "zh",
+            ],
+            max_choices=1,
+        ),
+        gr.Audio(
+            label="Reference Audio",
+            info="Upload a reference audio for target speaker voice",
+            type="filepath",
+        ),
     ],
     outputs=[
         gr.Video(label="Synthesised Speech"),
     ],
     title=title,
+).launch(debug=True)