laubonghaudoi
commited on
Commit
•
41fae1a
1
Parent(s):
ca2aafc
Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,7 @@ If running this app in WSL2, you need to run the following command in the WSL2 t
|
|
3 |
|
4 |
ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1
|
5 |
"""
|
|
|
6 |
import asyncio
|
7 |
import logging
|
8 |
import os
|
@@ -98,27 +99,36 @@ version = tts_config.version
|
|
98 |
|
99 |
|
100 |
@spaces.GPU
|
101 |
-
def inference(
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
seed = -1 if keep_random else seed
|
115 |
-
actual_seed = seed if seed not in [-1,
|
116 |
-
"", None] else random.randrange(1 << 32)
|
117 |
inputs = {
|
118 |
"text": text,
|
119 |
"text_lang": dict_language[text_lang],
|
120 |
"ref_audio_path": ref_audio_path,
|
121 |
-
"aux_ref_audio_paths": [item.name for item in aux_ref_audio_paths]
|
|
|
|
|
122 |
"prompt_text": prompt_text if not ref_text_free else "",
|
123 |
"prompt_lang": dict_language[prompt_lang],
|
124 |
"top_k": top_k,
|
@@ -140,22 +150,28 @@ def inference(text, text_lang,
|
|
140 |
|
141 |
def custom_sort_key(s):
|
142 |
# 使用正则表达式提取字符串中的数字部分和非数字部分
|
143 |
-
parts = re.split(
|
144 |
# 将数字部分转换为整数,非数字部分保持不变
|
145 |
parts = [int(part) if part.isdigit() else part for part in parts]
|
146 |
return parts
|
147 |
|
148 |
|
149 |
def change_choices():
|
150 |
-
SoVITS_names, GPT_names = get_weights_names(
|
151 |
-
|
152 |
-
|
|
|
|
|
153 |
|
154 |
|
155 |
pretrained_sovits_name = [
|
156 |
-
"GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth",
|
157 |
-
|
158 |
-
|
|
|
|
|
|
|
|
|
159 |
_ = [[], []]
|
160 |
for i in range(2):
|
161 |
if os.path.exists(pretrained_gpt_name[i]):
|
@@ -166,7 +182,7 @@ pretrained_gpt_name, pretrained_sovits_name = _
|
|
166 |
|
167 |
SoVITS_weight_root = ["SoVITS_weights_v2", "SoVITS_weights"]
|
168 |
GPT_weight_root = ["GPT_weights_v2", "GPT_weights"]
|
169 |
-
for path in SoVITS_weight_root+GPT_weight_root:
|
170 |
os.makedirs(path, exist_ok=True)
|
171 |
|
172 |
|
@@ -184,8 +200,7 @@ def get_weights_names(GPT_weight_root, SoVITS_weight_root):
|
|
184 |
return SoVITS_names, GPT_names
|
185 |
|
186 |
|
187 |
-
SoVITS_names, GPT_names = get_weights_names(
|
188 |
-
GPT_weight_root, SoVITS_weight_root)
|
189 |
|
190 |
|
191 |
def change_sovits_weights(sovits_path, prompt_language=None, text_language=None):
|
@@ -194,19 +209,29 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
|
|
194 |
dict_language = dict_language_v2
|
195 |
if prompt_language is not None and text_language is not None:
|
196 |
if prompt_language in list(dict_language.keys()):
|
197 |
-
prompt_text_update, prompt_language_update =
|
198 |
-
|
|
|
|
|
199 |
else:
|
200 |
-
prompt_text_update = {
|
201 |
-
prompt_language_update = {
|
202 |
-
'__type__': 'update', 'value': i18n("中文")}
|
203 |
if text_language in list(dict_language.keys()):
|
204 |
-
text_update, text_language_update =
|
205 |
-
|
|
|
|
|
206 |
else:
|
207 |
-
text_update = {
|
208 |
-
text_language_update = {
|
209 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
210 |
|
211 |
|
212 |
async def create_app():
|
@@ -221,104 +246,215 @@ async def create_app():
|
|
221 |
# with gr.Group():
|
222 |
gr.Markdown(value=i18n("模型切换"))
|
223 |
with gr.Row():
|
224 |
-
GPT_dropdown = gr.Dropdown(
|
225 |
-
|
226 |
-
|
227 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary")
|
229 |
-
refresh_button.click(
|
230 |
-
|
|
|
|
|
|
|
231 |
|
232 |
with gr.Row():
|
233 |
with gr.Column():
|
234 |
gr.Markdown(value=i18n("*请上传并填写参考信息"))
|
235 |
with gr.Row():
|
236 |
-
inp_ref = gr.Audio(
|
237 |
-
|
238 |
-
|
|
|
|
|
|
|
|
|
239 |
prompt_text = gr.Textbox(
|
240 |
-
label=i18n("主参考音频的文本"), value="", lines=2
|
|
|
241 |
with gr.Row():
|
242 |
prompt_language = gr.Dropdown(
|
243 |
-
label=i18n("主参考音频的语种"),
|
|
|
|
|
244 |
)
|
245 |
with gr.Column():
|
246 |
-
ref_text_free = gr.Checkbox(
|
247 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
248 |
gr.Markdown(
|
249 |
-
i18n(
|
|
|
|
|
|
|
250 |
|
251 |
with gr.Column():
|
252 |
gr.Markdown(value=i18n("*请填写需要合成的目标文本和语种模式"))
|
253 |
-
text = gr.Textbox(
|
254 |
-
|
|
|
255 |
text_language = gr.Dropdown(
|
256 |
-
label=i18n("需要合成的文本的语种"),
|
|
|
|
|
257 |
)
|
258 |
|
259 |
with gr.Group():
|
260 |
gr.Markdown(value=i18n("推理设置"))
|
261 |
with gr.Row():
|
262 |
-
|
263 |
with gr.Column():
|
264 |
-
batch_size = gr.Slider(
|
265 |
-
|
266 |
-
|
267 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
268 |
speed_factor = gr.Slider(
|
269 |
-
minimum=0.6,
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
278 |
with gr.Column():
|
279 |
with gr.Row():
|
280 |
how_to_cut = gr.Dropdown(
|
281 |
label=i18n("怎么切"),
|
282 |
-
choices=[
|
283 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
284 |
value=i18n("凑四句一切"),
|
285 |
-
interactive=True,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
286 |
)
|
287 |
-
parallel_infer = gr.Checkbox(label=i18n(
|
288 |
-
"并行推理"), value=True, interactive=True, show_label=True)
|
289 |
-
split_bucket = gr.Checkbox(label=i18n(
|
290 |
-
"数据分桶(并行推理时会降低一点计算量)"), value=True, interactive=True, show_label=True)
|
291 |
|
292 |
with gr.Row():
|
293 |
seed = gr.Number(label=i18n("随机种子"), value=-1)
|
294 |
-
keep_random = gr.Checkbox(
|
295 |
-
"保持随机"),
|
|
|
|
|
|
|
|
|
296 |
|
297 |
output = gr.Audio(label=i18n("输出的语音"))
|
298 |
with gr.Row():
|
299 |
inference_button = gr.Button(
|
300 |
-
i18n("合成语音"), variant="primary"
|
|
|
301 |
stop_infer = gr.Button(i18n("终止合成"), variant="primary")
|
302 |
|
303 |
inference_button.click(
|
304 |
inference,
|
305 |
[
|
306 |
-
text,
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
314 |
],
|
315 |
[output, seed],
|
316 |
)
|
317 |
stop_infer.click(tts_pipeline.stop, [], [])
|
318 |
-
SoVITS_dropdown.change(
|
319 |
-
|
320 |
-
|
321 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
322 |
|
323 |
# with gr.Group():
|
324 |
# gr.Markdown(value=i18n(
|
@@ -348,7 +484,7 @@ async def create_app():
|
|
348 |
return app
|
349 |
|
350 |
|
351 |
-
if __name__ ==
|
352 |
app = asyncio.run(create_app())
|
353 |
app.launch(
|
354 |
# server_name="0.0.0.0",
|
|
|
3 |
|
4 |
ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1
|
5 |
"""
|
6 |
+
|
7 |
import asyncio
|
8 |
import logging
|
9 |
import os
|
|
|
99 |
|
100 |
|
101 |
@spaces.GPU
|
102 |
+
def inference(
|
103 |
+
text,
|
104 |
+
text_lang,
|
105 |
+
ref_audio_path,
|
106 |
+
aux_ref_audio_paths,
|
107 |
+
prompt_text,
|
108 |
+
prompt_lang,
|
109 |
+
top_k,
|
110 |
+
top_p,
|
111 |
+
temperature,
|
112 |
+
text_split_method,
|
113 |
+
batch_size,
|
114 |
+
speed_factor,
|
115 |
+
ref_text_free,
|
116 |
+
split_bucket,
|
117 |
+
fragment_interval,
|
118 |
+
seed,
|
119 |
+
keep_random,
|
120 |
+
parallel_infer,
|
121 |
+
repetition_penalty,
|
122 |
+
):
|
123 |
seed = -1 if keep_random else seed
|
124 |
+
actual_seed = seed if seed not in [-1, "", None] else random.randrange(1 << 32)
|
|
|
125 |
inputs = {
|
126 |
"text": text,
|
127 |
"text_lang": dict_language[text_lang],
|
128 |
"ref_audio_path": ref_audio_path,
|
129 |
+
"aux_ref_audio_paths": [item.name for item in aux_ref_audio_paths]
|
130 |
+
if aux_ref_audio_paths is not None
|
131 |
+
else [],
|
132 |
"prompt_text": prompt_text if not ref_text_free else "",
|
133 |
"prompt_lang": dict_language[prompt_lang],
|
134 |
"top_k": top_k,
|
|
|
150 |
|
151 |
def custom_sort_key(s):
|
152 |
# 使用正则表达式提取字符串中的数字部分和非数字部分
|
153 |
+
parts = re.split("(\d+)", s)
|
154 |
# 将数字部分转换为整数,非数字部分保持不变
|
155 |
parts = [int(part) if part.isdigit() else part for part in parts]
|
156 |
return parts
|
157 |
|
158 |
|
159 |
def change_choices():
|
160 |
+
SoVITS_names, GPT_names = get_weights_names(GPT_weight_root, SoVITS_weight_root)
|
161 |
+
return {
|
162 |
+
"choices": sorted(SoVITS_names, key=custom_sort_key),
|
163 |
+
"__type__": "update",
|
164 |
+
}, {"choices": sorted(GPT_names, key=custom_sort_key), "__type__": "update"}
|
165 |
|
166 |
|
167 |
pretrained_sovits_name = [
|
168 |
+
"GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth",
|
169 |
+
"GPT_SoVITS/pretrained_models/s2G488k.pth",
|
170 |
+
]
|
171 |
+
pretrained_gpt_name = [
|
172 |
+
"GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt",
|
173 |
+
"GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt",
|
174 |
+
]
|
175 |
_ = [[], []]
|
176 |
for i in range(2):
|
177 |
if os.path.exists(pretrained_gpt_name[i]):
|
|
|
182 |
|
183 |
SoVITS_weight_root = ["SoVITS_weights_v2", "SoVITS_weights"]
|
184 |
GPT_weight_root = ["GPT_weights_v2", "GPT_weights"]
|
185 |
+
for path in SoVITS_weight_root + GPT_weight_root:
|
186 |
os.makedirs(path, exist_ok=True)
|
187 |
|
188 |
|
|
|
200 |
return SoVITS_names, GPT_names
|
201 |
|
202 |
|
203 |
+
SoVITS_names, GPT_names = get_weights_names(GPT_weight_root, SoVITS_weight_root)
|
|
|
204 |
|
205 |
|
206 |
def change_sovits_weights(sovits_path, prompt_language=None, text_language=None):
|
|
|
209 |
dict_language = dict_language_v2
|
210 |
if prompt_language is not None and text_language is not None:
|
211 |
if prompt_language in list(dict_language.keys()):
|
212 |
+
prompt_text_update, prompt_language_update = (
|
213 |
+
{"__type__": "update"},
|
214 |
+
{"__type__": "update", "value": prompt_language},
|
215 |
+
)
|
216 |
else:
|
217 |
+
prompt_text_update = {"__type__": "update", "value": ""}
|
218 |
+
prompt_language_update = {"__type__": "update", "value": i18n("中文")}
|
|
|
219 |
if text_language in list(dict_language.keys()):
|
220 |
+
text_update, text_language_update = (
|
221 |
+
{"__type__": "update"},
|
222 |
+
{"__type__": "update", "value": text_language},
|
223 |
+
)
|
224 |
else:
|
225 |
+
text_update = {"__type__": "update", "value": ""}
|
226 |
+
text_language_update = {"__type__": "update", "value": i18n("中文")}
|
227 |
+
return (
|
228 |
+
{"__type__": "update", "choices": list(dict_language.keys())},
|
229 |
+
{"__type__": "update", "choices": list(dict_language.keys())},
|
230 |
+
prompt_text_update,
|
231 |
+
prompt_language_update,
|
232 |
+
text_update,
|
233 |
+
text_language_update,
|
234 |
+
)
|
235 |
|
236 |
|
237 |
async def create_app():
|
|
|
246 |
# with gr.Group():
|
247 |
gr.Markdown(value=i18n("模型切换"))
|
248 |
with gr.Row():
|
249 |
+
GPT_dropdown = gr.Dropdown(
|
250 |
+
label=i18n("GPT模型列表"),
|
251 |
+
choices=sorted(GPT_names, key=custom_sort_key),
|
252 |
+
value=gpt_path,
|
253 |
+
interactive=True,
|
254 |
+
)
|
255 |
+
SoVITS_dropdown = gr.Dropdown(
|
256 |
+
label=i18n("SoVITS模型列表"),
|
257 |
+
choices=sorted(SoVITS_names, key=custom_sort_key),
|
258 |
+
value=sovits_path,
|
259 |
+
interactive=True,
|
260 |
+
)
|
261 |
refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary")
|
262 |
+
refresh_button.click(
|
263 |
+
fn=change_choices,
|
264 |
+
inputs=[],
|
265 |
+
outputs=[SoVITS_dropdown, GPT_dropdown],
|
266 |
+
)
|
267 |
|
268 |
with gr.Row():
|
269 |
with gr.Column():
|
270 |
gr.Markdown(value=i18n("*请上传并填写参考信息"))
|
271 |
with gr.Row():
|
272 |
+
inp_ref = gr.Audio(
|
273 |
+
label="上傳 3-10 秒長嘅參考音頻", type="filepath"
|
274 |
+
)
|
275 |
+
inp_refs = gr.File(
|
276 |
+
label=i18n("辅参考音频(可选多个,或不选)"),
|
277 |
+
file_count="multiple",
|
278 |
+
)
|
279 |
prompt_text = gr.Textbox(
|
280 |
+
label=i18n("主参考音频的文本"), value="", lines=2
|
281 |
+
)
|
282 |
with gr.Row():
|
283 |
prompt_language = gr.Dropdown(
|
284 |
+
label=i18n("主参考音频的语种"),
|
285 |
+
choices=list(dict_language.keys()),
|
286 |
+
value=i18n("中文"),
|
287 |
)
|
288 |
with gr.Column():
|
289 |
+
ref_text_free = gr.Checkbox(
|
290 |
+
label=i18n(
|
291 |
+
"开启无参考文本模式。不填参考文本亦相当于开启。"
|
292 |
+
),
|
293 |
+
value=False,
|
294 |
+
interactive=True,
|
295 |
+
show_label=True,
|
296 |
+
)
|
297 |
gr.Markdown(
|
298 |
+
i18n(
|
299 |
+
"使用无参考文本模式时建议使用微调的GPT,听不清参考音频说的啥(不晓得写啥)可以开,开启后无视填写的参考文本。"
|
300 |
+
)
|
301 |
+
)
|
302 |
|
303 |
with gr.Column():
|
304 |
gr.Markdown(value=i18n("*请填写需要合成的目标文本和语种模式"))
|
305 |
+
text = gr.Textbox(
|
306 |
+
label=i18n("需要合成的文本"), value="", lines=20, max_lines=20
|
307 |
+
)
|
308 |
text_language = gr.Dropdown(
|
309 |
+
label=i18n("需要合成的文本的语种"),
|
310 |
+
choices=list(dict_language.keys()),
|
311 |
+
value=i18n("中文"),
|
312 |
)
|
313 |
|
314 |
with gr.Group():
|
315 |
gr.Markdown(value=i18n("推理设置"))
|
316 |
with gr.Row():
|
|
|
317 |
with gr.Column():
|
318 |
+
batch_size = gr.Slider(
|
319 |
+
minimum=1,
|
320 |
+
maximum=200,
|
321 |
+
step=1,
|
322 |
+
label=i18n("batch_size"),
|
323 |
+
value=20,
|
324 |
+
interactive=True,
|
325 |
+
)
|
326 |
+
fragment_interval = gr.Slider(
|
327 |
+
minimum=0.01,
|
328 |
+
maximum=1,
|
329 |
+
step=0.01,
|
330 |
+
label=i18n("分段间隔(秒)"),
|
331 |
+
value=0.3,
|
332 |
+
interactive=True,
|
333 |
+
)
|
334 |
speed_factor = gr.Slider(
|
335 |
+
minimum=0.6,
|
336 |
+
maximum=1.65,
|
337 |
+
step=0.05,
|
338 |
+
label="speed_factor",
|
339 |
+
value=1.0,
|
340 |
+
interactive=True,
|
341 |
+
)
|
342 |
+
top_k = gr.Slider(
|
343 |
+
minimum=1,
|
344 |
+
maximum=100,
|
345 |
+
step=1,
|
346 |
+
label=i18n("top_k"),
|
347 |
+
value=5,
|
348 |
+
interactive=True,
|
349 |
+
)
|
350 |
+
top_p = gr.Slider(
|
351 |
+
minimum=0,
|
352 |
+
maximum=1,
|
353 |
+
step=0.05,
|
354 |
+
label=i18n("top_p"),
|
355 |
+
value=1,
|
356 |
+
interactive=True,
|
357 |
+
)
|
358 |
+
temperature = gr.Slider(
|
359 |
+
minimum=0,
|
360 |
+
maximum=1,
|
361 |
+
step=0.05,
|
362 |
+
label=i18n("temperature"),
|
363 |
+
value=1,
|
364 |
+
interactive=True,
|
365 |
+
)
|
366 |
+
repetition_penalty = gr.Slider(
|
367 |
+
minimum=0,
|
368 |
+
maximum=2,
|
369 |
+
step=0.05,
|
370 |
+
label=i18n("重复惩罚"),
|
371 |
+
value=1.35,
|
372 |
+
interactive=True,
|
373 |
+
)
|
374 |
with gr.Column():
|
375 |
with gr.Row():
|
376 |
how_to_cut = gr.Dropdown(
|
377 |
label=i18n("怎么切"),
|
378 |
+
choices=[
|
379 |
+
i18n("不切"),
|
380 |
+
i18n("凑四句一切"),
|
381 |
+
i18n("凑50字一切"),
|
382 |
+
i18n("按中文句号。切"),
|
383 |
+
i18n("按英文句号.���"),
|
384 |
+
i18n("按标点符号切"),
|
385 |
+
],
|
386 |
value=i18n("凑四句一切"),
|
387 |
+
interactive=True,
|
388 |
+
scale=1,
|
389 |
+
)
|
390 |
+
parallel_infer = gr.Checkbox(
|
391 |
+
label=i18n("并行推理"),
|
392 |
+
value=True,
|
393 |
+
interactive=True,
|
394 |
+
show_label=True,
|
395 |
+
)
|
396 |
+
split_bucket = gr.Checkbox(
|
397 |
+
label=i18n("数据分桶(并行推理时会降低一点计算量)"),
|
398 |
+
value=True,
|
399 |
+
interactive=True,
|
400 |
+
show_label=True,
|
401 |
)
|
|
|
|
|
|
|
|
|
402 |
|
403 |
with gr.Row():
|
404 |
seed = gr.Number(label=i18n("随机种子"), value=-1)
|
405 |
+
keep_random = gr.Checkbox(
|
406 |
+
label=i18n("保持随机"),
|
407 |
+
value=True,
|
408 |
+
interactive=True,
|
409 |
+
show_label=True,
|
410 |
+
)
|
411 |
|
412 |
output = gr.Audio(label=i18n("输出的语音"))
|
413 |
with gr.Row():
|
414 |
inference_button = gr.Button(
|
415 |
+
i18n("合成语音"), variant="primary"
|
416 |
+
)
|
417 |
stop_infer = gr.Button(i18n("终止合成"), variant="primary")
|
418 |
|
419 |
inference_button.click(
|
420 |
inference,
|
421 |
[
|
422 |
+
text,
|
423 |
+
text_language,
|
424 |
+
inp_ref,
|
425 |
+
inp_refs,
|
426 |
+
prompt_text,
|
427 |
+
prompt_language,
|
428 |
+
top_k,
|
429 |
+
top_p,
|
430 |
+
temperature,
|
431 |
+
how_to_cut,
|
432 |
+
batch_size,
|
433 |
+
speed_factor,
|
434 |
+
ref_text_free,
|
435 |
+
split_bucket,
|
436 |
+
fragment_interval,
|
437 |
+
seed,
|
438 |
+
keep_random,
|
439 |
+
parallel_infer,
|
440 |
+
repetition_penalty,
|
441 |
],
|
442 |
[output, seed],
|
443 |
)
|
444 |
stop_infer.click(tts_pipeline.stop, [], [])
|
445 |
+
SoVITS_dropdown.change(
|
446 |
+
change_sovits_weights,
|
447 |
+
[SoVITS_dropdown, prompt_language, text_language],
|
448 |
+
[
|
449 |
+
prompt_language,
|
450 |
+
text_language,
|
451 |
+
prompt_text,
|
452 |
+
prompt_language,
|
453 |
+
text,
|
454 |
+
text_language,
|
455 |
+
],
|
456 |
+
)
|
457 |
+
GPT_dropdown.change(tts_pipeline.init_t2s_weights, [GPT_dropdown], [])
|
458 |
|
459 |
# with gr.Group():
|
460 |
# gr.Markdown(value=i18n(
|
|
|
484 |
return app
|
485 |
|
486 |
|
487 |
+
if __name__ == "__main__":
|
488 |
app = asyncio.run(create_app())
|
489 |
app.launch(
|
490 |
# server_name="0.0.0.0",
|