Digital Xingtong commited on
Commit
9116564
1 Parent(s): 12de674

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -4
app.py CHANGED
@@ -50,7 +50,7 @@ def get_text(text, language_str, hps):
50
  language = torch.LongTensor(language)
51
 
52
  return bert, phone, tone, language
53
-
54
  def infer(text, sdp_ratio, noise_scale, noise_scale_w, length_scale, sid):
55
  global net_g
56
  bert, phones, tones, lang_ids = get_text(text, "ZH", hps)
@@ -65,12 +65,24 @@ def infer(text, sdp_ratio, noise_scale, noise_scale_w, length_scale, sid):
65
  audio = net_g.infer(x_tst, x_tst_lengths, speakers, tones, lang_ids, bert, sdp_ratio=sdp_ratio
66
  , noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0,0].data.cpu().float().numpy()
67
  del x_tst, tones, lang_ids, bert, x_tst_lengths, speakers
 
68
  return audio
69
-
 
 
 
 
 
 
 
 
 
70
  def tts_fn(text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale):
71
  with torch.no_grad():
72
  audio = infer(text, sdp_ratio=sdp_ratio, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale, sid=speaker)
73
- return "Success", (hps.data.sampling_rate, audio)
 
 
74
 
75
 
76
  if __name__ == "__main__":
@@ -138,10 +150,11 @@ if __name__ == "__main__":
138
  with gr.Column():
139
  text_output = gr.Textbox(label="Message")
140
  audio_output = gr.Audio(label="Output Audio")
 
141
 
142
  btn.click(tts_fn,
143
  inputs=[text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale],
144
- outputs=[text_output, audio_output])
145
 
146
  # webbrowser.open("http://127.0.0.1:6006")
147
  # app.launch(server_port=6006, show_error=True)
 
50
  language = torch.LongTensor(language)
51
 
52
  return bert, phone, tone, language
53
+ import soundfile as sf
54
  def infer(text, sdp_ratio, noise_scale, noise_scale_w, length_scale, sid):
55
  global net_g
56
  bert, phones, tones, lang_ids = get_text(text, "ZH", hps)
 
65
  audio = net_g.infer(x_tst, x_tst_lengths, speakers, tones, lang_ids, bert, sdp_ratio=sdp_ratio
66
  , noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0,0].data.cpu().float().numpy()
67
  del x_tst, tones, lang_ids, bert, x_tst_lengths, speakers
68
+ sf.write("tmp.wav", audio, 44100)
69
  return audio
70
+ def convert_wav_to_ogg(wav_file):
71
+ os.makedirs('out', exist_ok=True)
72
+ filename = os.path.splitext(os.path.basename(wav_file.name))[0]
73
+ output_path_ogg = os.path.join('out', f"out.ogg")
74
+
75
+ renamed_input_path = os.path.join('in', f"in.wav")
76
+ os.makedirs('in', exist_ok=True)
77
+ os.rename(wav_file.name, renamed_input_path)
78
+ command = ["ffmpeg", "-i", renamed_input_path, "-acodec", "libopus", "-y", output_path_ogg]
79
+ os.system(" ".join(command))
80
  def tts_fn(text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale):
81
  with torch.no_grad():
82
  audio = infer(text, sdp_ratio=sdp_ratio, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale, sid=speaker)
83
+ with open('tmp.wav', 'rb') as wav_file:
84
+ newogg = convert_wav_to_ogg(wav_file)
85
+ return "Success", (hps.data.sampling_rate, audio),newogg
86
 
87
 
88
  if __name__ == "__main__":
 
150
  with gr.Column():
151
  text_output = gr.Textbox(label="Message")
152
  audio_output = gr.Audio(label="Output Audio")
153
+ ogg_output = gr.File(label="Converted OGG file")
154
 
155
  btn.click(tts_fn,
156
  inputs=[text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale],
157
+ outputs=[text_output, audio_output,ogg_output])
158
 
159
  # webbrowser.open("http://127.0.0.1:6006")
160
  # app.launch(server_port=6006, show_error=True)