yakima5329 commited on
Commit
de0feb8
1 Parent(s): 4f420c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -6
app.py CHANGED
@@ -5,6 +5,9 @@ import re
5
  import time
6
  import uuid
7
  from io import StringIO
 
 
 
8
 
9
  import gradio as gr
10
  import spaces
@@ -169,7 +172,21 @@ def predict(
169
  keep_len = calculate_keep_len(prompt, language)
170
  out["wav"] = out["wav"][:keep_len]
171
 
172
- torchaudio.save("output.wav", torch.tensor(out["wav"]).unsqueeze(0), 24000)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
  except RuntimeError as e:
175
  if "device-side assert" in str(e):
@@ -235,7 +252,7 @@ def predict(
235
  "Something unexpected happened please retry again."
236
  )
237
  return (None, metrics_text)
238
- return ("output.wav", metrics_text)
239
 
240
 
241
  with gr.Blocks(analytics_enabled=False) as demo:
@@ -244,8 +261,8 @@ with gr.Blocks(analytics_enabled=False) as demo:
244
  gr.Markdown(
245
  """
246
  # viXTTS Demo ✨
247
- - Github: https://github.com/thinhlpg/vixtts-demo/
248
- - viVoice: https://github.com/thinhlpg/viVoice
249
  """
250
  )
251
  with gr.Column():
@@ -304,6 +321,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
304
  with gr.Column():
305
  audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
306
  out_text_gr = gr.Text(label="Metrics")
 
307
 
308
  tts_button.click(
309
  predict,
@@ -313,9 +331,9 @@ with gr.Blocks(analytics_enabled=False) as demo:
313
  ref_gr,
314
  normalize_text,
315
  ],
316
- outputs=[audio_gr, out_text_gr],
317
  api_name="predict",
318
  )
319
 
320
  demo.queue()
321
- demo.launch(debug=True, show_api=True, share=True)
 
5
  import time
6
  import uuid
7
  from io import StringIO
8
+ from io import BytesIO
9
+ import base64
10
+ import requests
11
 
12
  import gradio as gr
13
  import spaces
 
172
  keep_len = calculate_keep_len(prompt, language)
173
  out["wav"] = out["wav"][:keep_len]
174
 
175
+ # print(out)
176
+ # print(out["wav"])
177
+
178
+ buffer = BytesIO()
179
+
180
+ # torchaudio.save("output.wav", torch.tensor(out["wav"]).unsqueeze(0), 24000)
181
+ torchaudio.save(buffer, torch.tensor(out["wav"]).unsqueeze(0), 24000, format='wav')
182
+
183
+ output_path = "output.wav"
184
+ with open(output_path, "wb") as f:
185
+ f.write(buffer.getbuffer())
186
+
187
+ upload_url = "https://temp.sh/upload"
188
+ res = requests.post(upload_url, files={"file": open(output_path, "rb")})
189
+ response = str(res.content, 'utf-8')
190
 
191
  except RuntimeError as e:
192
  if "device-side assert" in str(e):
 
252
  "Something unexpected happened please retry again."
253
  )
254
  return (None, metrics_text)
255
+ return ("output.wav", metrics_text, response)
256
 
257
 
258
  with gr.Blocks(analytics_enabled=False) as demo:
 
261
  gr.Markdown(
262
  """
263
  # viXTTS Demo ✨
264
+ - Github: GitHub - thinhlpg/vixtts-demo: A Vietnamese Voice Text-to-Speech Model ✨
265
+ - viVoice: GitHub - thinhlpg/viVoice: A 1000 Hours Cleaned Vietnamese Speech Dataset ✨
266
  """
267
  )
268
  with gr.Column():
 
321
  with gr.Column():
322
  audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
323
  out_text_gr = gr.Text(label="Metrics")
324
+ response = gr.Text(label="temp")
325
 
326
  tts_button.click(
327
  predict,
 
331
  ref_gr,
332
  normalize_text,
333
  ],
334
+ outputs=[audio_gr, out_text_gr, response],
335
  api_name="predict",
336
  )
337
 
338
  demo.queue()
339
+ demo.launch(debug=True, show_api=True, share=True)