Spaces:

zzb1420
/

Bufeiyan-b-Bert-VITS2

Runtime error

App Files Files Community

Digital Xingtong commited on Sep 18, 2023

Commit

9116564

•

1 Parent(s): 12de674

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -4

app.py CHANGED Viewed

@@ -50,7 +50,7 @@ def get_text(text, language_str, hps):
     language = torch.LongTensor(language)
     return bert, phone, tone, language
 def infer(text, sdp_ratio, noise_scale, noise_scale_w, length_scale, sid):
     global net_g
     bert, phones, tones, lang_ids = get_text(text, "ZH", hps)
@@ -65,12 +65,24 @@ def infer(text, sdp_ratio, noise_scale, noise_scale_w, length_scale, sid):
         audio = net_g.infer(x_tst, x_tst_lengths, speakers, tones, lang_ids, bert, sdp_ratio=sdp_ratio
                            , noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0,0].data.cpu().float().numpy()
         del x_tst, tones, lang_ids, bert, x_tst_lengths, speakers
         return audio
 def tts_fn(text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale):
     with torch.no_grad():
         audio = infer(text, sdp_ratio=sdp_ratio, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale, sid=speaker)
-    return "Success", (hps.data.sampling_rate, audio)
 if __name__ == "__main__":
@@ -138,10 +150,11 @@ if __name__ == "__main__":
             with gr.Column():
                 text_output = gr.Textbox(label="Message")
                 audio_output = gr.Audio(label="Output Audio")
         btn.click(tts_fn,
                 inputs=[text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale],
-                outputs=[text_output, audio_output])
 #    webbrowser.open("http://127.0.0.1:6006")
 #    app.launch(server_port=6006, show_error=True)

     language = torch.LongTensor(language)
     return bert, phone, tone, language
+import soundfile as sf
 def infer(text, sdp_ratio, noise_scale, noise_scale_w, length_scale, sid):
     global net_g
     bert, phones, tones, lang_ids = get_text(text, "ZH", hps)
         audio = net_g.infer(x_tst, x_tst_lengths, speakers, tones, lang_ids, bert, sdp_ratio=sdp_ratio
                            , noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0,0].data.cpu().float().numpy()
         del x_tst, tones, lang_ids, bert, x_tst_lengths, speakers
+        sf.write("tmp.wav", audio, 44100)
         return audio
+def convert_wav_to_ogg(wav_file):
+    os.makedirs('out', exist_ok=True)
+    filename = os.path.splitext(os.path.basename(wav_file.name))[0]
+    output_path_ogg = os.path.join('out', f"out.ogg")
+    renamed_input_path = os.path.join('in', f"in.wav")
+    os.makedirs('in', exist_ok=True)
+    os.rename(wav_file.name, renamed_input_path)
+    command = ["ffmpeg", "-i", renamed_input_path, "-acodec", "libopus", "-y", output_path_ogg]
+    os.system(" ".join(command))
 def tts_fn(text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale):
     with torch.no_grad():
         audio = infer(text, sdp_ratio=sdp_ratio, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale, sid=speaker)
+    with open('tmp.wav', 'rb') as wav_file:
+        newogg = convert_wav_to_ogg(wav_file)
+    return "Success", (hps.data.sampling_rate, audio),newogg
 if __name__ == "__main__":
             with gr.Column():
                 text_output = gr.Textbox(label="Message")
                 audio_output = gr.Audio(label="Output Audio")
+                ogg_output = gr.File(label="Converted OGG file")
         btn.click(tts_fn,
                 inputs=[text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale],
+                outputs=[text_output, audio_output,ogg_output])
 #    webbrowser.open("http://127.0.0.1:6006")
 #    app.launch(server_port=6006, show_error=True)