vixtts-demo

Sleeping

App Files Files Community

yakima5329 commited on Jul 17

Commit

de0feb8

•

1 Parent(s): 4f420c4

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -6

app.py CHANGED Viewed

@@ -5,6 +5,9 @@ import re
 import time
 import uuid
 from io import StringIO
 import gradio as gr
 import spaces
@@ -169,7 +172,21 @@ def predict(
         keep_len = calculate_keep_len(prompt, language)
         out["wav"] = out["wav"][:keep_len]
-        torchaudio.save("output.wav", torch.tensor(out["wav"]).unsqueeze(0), 24000)
     except RuntimeError as e:
         if "device-side assert" in str(e):
@@ -235,7 +252,7 @@ def predict(
                     "Something unexpected happened please retry again."
                 )
             return (None, metrics_text)
-    return ("output.wav", metrics_text)
 with gr.Blocks(analytics_enabled=False) as demo:
@@ -244,8 +261,8 @@ with gr.Blocks(analytics_enabled=False) as demo:
             gr.Markdown(
                 """
                 # viXTTS Demo ✨
-                - Github: https://github.com/thinhlpg/vixtts-demo/
-                - viVoice: https://github.com/thinhlpg/viVoice
                 """
             )
         with gr.Column():
@@ -304,6 +321,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
         with gr.Column():
             audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
             out_text_gr = gr.Text(label="Metrics")
     tts_button.click(
         predict,
@@ -313,9 +331,9 @@ with gr.Blocks(analytics_enabled=False) as demo:
             ref_gr,
             normalize_text,
         ],
-        outputs=[audio_gr, out_text_gr],
         api_name="predict",
     )
 demo.queue()
-demo.launch(debug=True, show_api=True, share=True)

 import time
 import uuid
 from io import StringIO
+from io import BytesIO
+import base64
+import requests
 import gradio as gr
 import spaces
         keep_len = calculate_keep_len(prompt, language)
         out["wav"] = out["wav"][:keep_len]
+        # print(out)
+        # print(out["wav"])
+        buffer = BytesIO()
+        # torchaudio.save("output.wav", torch.tensor(out["wav"]).unsqueeze(0), 24000)
+        torchaudio.save(buffer, torch.tensor(out["wav"]).unsqueeze(0), 24000, format='wav')
+        output_path = "output.wav"
+        with open(output_path, "wb") as f:
+            f.write(buffer.getbuffer())
+        upload_url = "https://temp.sh/upload"
+        res = requests.post(upload_url, files={"file": open(output_path, "rb")})
+        response = str(res.content, 'utf-8')
     except RuntimeError as e:
         if "device-side assert" in str(e):
                     "Something unexpected happened please retry again."
                 )
             return (None, metrics_text)
+    return ("output.wav", metrics_text, response)
 with gr.Blocks(analytics_enabled=False) as demo:
             gr.Markdown(
                 """
                 # viXTTS Demo ✨
+                - Github: GitHub - thinhlpg/vixtts-demo: A Vietnamese Voice Text-to-Speech Model ✨
+                - viVoice: GitHub - thinhlpg/viVoice: A 1000 Hours Cleaned Vietnamese Speech Dataset ✨
                 """
             )
         with gr.Column():
         with gr.Column():
             audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
             out_text_gr = gr.Text(label="Metrics")
+            response = gr.Text(label="temp")
     tts_button.click(
         predict,
             ref_gr,
             normalize_text,
         ],
+        outputs=[audio_gr, out_text_gr, response],
         api_name="predict",
     )
 demo.queue()
+demo.launch(debug=True, show_api=True, share=True)