Spaces:
Runtime error
Runtime error
Digital Xingtong
commited on
Commit
•
9116564
1
Parent(s):
12de674
Update app.py
Browse files
app.py
CHANGED
@@ -50,7 +50,7 @@ def get_text(text, language_str, hps):
|
|
50 |
language = torch.LongTensor(language)
|
51 |
|
52 |
return bert, phone, tone, language
|
53 |
-
|
54 |
def infer(text, sdp_ratio, noise_scale, noise_scale_w, length_scale, sid):
|
55 |
global net_g
|
56 |
bert, phones, tones, lang_ids = get_text(text, "ZH", hps)
|
@@ -65,12 +65,24 @@ def infer(text, sdp_ratio, noise_scale, noise_scale_w, length_scale, sid):
|
|
65 |
audio = net_g.infer(x_tst, x_tst_lengths, speakers, tones, lang_ids, bert, sdp_ratio=sdp_ratio
|
66 |
, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0,0].data.cpu().float().numpy()
|
67 |
del x_tst, tones, lang_ids, bert, x_tst_lengths, speakers
|
|
|
68 |
return audio
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
def tts_fn(text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale):
|
71 |
with torch.no_grad():
|
72 |
audio = infer(text, sdp_ratio=sdp_ratio, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale, sid=speaker)
|
73 |
-
|
|
|
|
|
74 |
|
75 |
|
76 |
if __name__ == "__main__":
|
@@ -138,10 +150,11 @@ if __name__ == "__main__":
|
|
138 |
with gr.Column():
|
139 |
text_output = gr.Textbox(label="Message")
|
140 |
audio_output = gr.Audio(label="Output Audio")
|
|
|
141 |
|
142 |
btn.click(tts_fn,
|
143 |
inputs=[text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale],
|
144 |
-
outputs=[text_output, audio_output])
|
145 |
|
146 |
# webbrowser.open("http://127.0.0.1:6006")
|
147 |
# app.launch(server_port=6006, show_error=True)
|
|
|
50 |
language = torch.LongTensor(language)
|
51 |
|
52 |
return bert, phone, tone, language
|
53 |
+
import soundfile as sf
|
54 |
def infer(text, sdp_ratio, noise_scale, noise_scale_w, length_scale, sid):
|
55 |
global net_g
|
56 |
bert, phones, tones, lang_ids = get_text(text, "ZH", hps)
|
|
|
65 |
audio = net_g.infer(x_tst, x_tst_lengths, speakers, tones, lang_ids, bert, sdp_ratio=sdp_ratio
|
66 |
, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0,0].data.cpu().float().numpy()
|
67 |
del x_tst, tones, lang_ids, bert, x_tst_lengths, speakers
|
68 |
+
sf.write("tmp.wav", audio, 44100)
|
69 |
return audio
|
70 |
+
def convert_wav_to_ogg(wav_file):
|
71 |
+
os.makedirs('out', exist_ok=True)
|
72 |
+
filename = os.path.splitext(os.path.basename(wav_file.name))[0]
|
73 |
+
output_path_ogg = os.path.join('out', f"out.ogg")
|
74 |
+
|
75 |
+
renamed_input_path = os.path.join('in', f"in.wav")
|
76 |
+
os.makedirs('in', exist_ok=True)
|
77 |
+
os.rename(wav_file.name, renamed_input_path)
|
78 |
+
command = ["ffmpeg", "-i", renamed_input_path, "-acodec", "libopus", "-y", output_path_ogg]
|
79 |
+
os.system(" ".join(command))
|
80 |
def tts_fn(text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale):
|
81 |
with torch.no_grad():
|
82 |
audio = infer(text, sdp_ratio=sdp_ratio, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale, sid=speaker)
|
83 |
+
with open('tmp.wav', 'rb') as wav_file:
|
84 |
+
newogg = convert_wav_to_ogg(wav_file)
|
85 |
+
return "Success", (hps.data.sampling_rate, audio),newogg
|
86 |
|
87 |
|
88 |
if __name__ == "__main__":
|
|
|
150 |
with gr.Column():
|
151 |
text_output = gr.Textbox(label="Message")
|
152 |
audio_output = gr.Audio(label="Output Audio")
|
153 |
+
ogg_output = gr.File(label="Converted OGG file")
|
154 |
|
155 |
btn.click(tts_fn,
|
156 |
inputs=[text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale],
|
157 |
+
outputs=[text_output, audio_output,ogg_output])
|
158 |
|
159 |
# webbrowser.open("http://127.0.0.1:6006")
|
160 |
# app.launch(server_port=6006, show_error=True)
|