laubonghaudoi commited on
Commit
41fae1a
1 Parent(s): ca2aafc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +226 -90
app.py CHANGED
@@ -3,6 +3,7 @@ If running this app in WSL2, you need to run the following command in the WSL2 t
3
 
4
  ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1
5
  """
 
6
  import asyncio
7
  import logging
8
  import os
@@ -98,27 +99,36 @@ version = tts_config.version
98
 
99
 
100
  @spaces.GPU
101
- def inference(text, text_lang,
102
- ref_audio_path,
103
- aux_ref_audio_paths,
104
- prompt_text,
105
- prompt_lang, top_k,
106
- top_p, temperature,
107
- text_split_method, batch_size,
108
- speed_factor, ref_text_free,
109
- split_bucket, fragment_interval,
110
- seed, keep_random, parallel_infer,
111
- repetition_penalty
112
- ):
113
-
 
 
 
 
 
 
 
 
114
  seed = -1 if keep_random else seed
115
- actual_seed = seed if seed not in [-1,
116
- "", None] else random.randrange(1 << 32)
117
  inputs = {
118
  "text": text,
119
  "text_lang": dict_language[text_lang],
120
  "ref_audio_path": ref_audio_path,
121
- "aux_ref_audio_paths": [item.name for item in aux_ref_audio_paths] if aux_ref_audio_paths is not None else [],
 
 
122
  "prompt_text": prompt_text if not ref_text_free else "",
123
  "prompt_lang": dict_language[prompt_lang],
124
  "top_k": top_k,
@@ -140,22 +150,28 @@ def inference(text, text_lang,
140
 
141
  def custom_sort_key(s):
142
  # 使用正则表达式提取字符串中的数字部分和非数字部分
143
- parts = re.split('(\d+)', s)
144
  # 将数字部分转换为整数,非数字部分保持不变
145
  parts = [int(part) if part.isdigit() else part for part in parts]
146
  return parts
147
 
148
 
149
  def change_choices():
150
- SoVITS_names, GPT_names = get_weights_names(
151
- GPT_weight_root, SoVITS_weight_root)
152
- return {"choices": sorted(SoVITS_names, key=custom_sort_key), "__type__": "update"}, {"choices": sorted(GPT_names, key=custom_sort_key), "__type__": "update"}
 
 
153
 
154
 
155
  pretrained_sovits_name = [
156
- "GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth", "GPT_SoVITS/pretrained_models/s2G488k.pth"]
157
- pretrained_gpt_name = ["GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt",
158
- "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt"]
 
 
 
 
159
  _ = [[], []]
160
  for i in range(2):
161
  if os.path.exists(pretrained_gpt_name[i]):
@@ -166,7 +182,7 @@ pretrained_gpt_name, pretrained_sovits_name = _
166
 
167
  SoVITS_weight_root = ["SoVITS_weights_v2", "SoVITS_weights"]
168
  GPT_weight_root = ["GPT_weights_v2", "GPT_weights"]
169
- for path in SoVITS_weight_root+GPT_weight_root:
170
  os.makedirs(path, exist_ok=True)
171
 
172
 
@@ -184,8 +200,7 @@ def get_weights_names(GPT_weight_root, SoVITS_weight_root):
184
  return SoVITS_names, GPT_names
185
 
186
 
187
- SoVITS_names, GPT_names = get_weights_names(
188
- GPT_weight_root, SoVITS_weight_root)
189
 
190
 
191
  def change_sovits_weights(sovits_path, prompt_language=None, text_language=None):
@@ -194,19 +209,29 @@ def change_sovits_weights(sovits_path, prompt_language=None, text_language=None)
194
  dict_language = dict_language_v2
195
  if prompt_language is not None and text_language is not None:
196
  if prompt_language in list(dict_language.keys()):
197
- prompt_text_update, prompt_language_update = {'__type__': 'update'}, {
198
- '__type__': 'update', 'value': prompt_language}
 
 
199
  else:
200
- prompt_text_update = {'__type__': 'update', 'value': ''}
201
- prompt_language_update = {
202
- '__type__': 'update', 'value': i18n("中文")}
203
  if text_language in list(dict_language.keys()):
204
- text_update, text_language_update = {'__type__': 'update'}, {
205
- '__type__': 'update', 'value': text_language}
 
 
206
  else:
207
- text_update = {'__type__': 'update', 'value': ''}
208
- text_language_update = {'__type__': 'update', 'value': i18n("中文")}
209
- return {'__type__': 'update', 'choices': list(dict_language.keys())}, {'__type__': 'update', 'choices': list(dict_language.keys())}, prompt_text_update, prompt_language_update, text_update, text_language_update
 
 
 
 
 
 
 
210
 
211
 
212
  async def create_app():
@@ -221,104 +246,215 @@ async def create_app():
221
  # with gr.Group():
222
  gr.Markdown(value=i18n("模型切换"))
223
  with gr.Row():
224
- GPT_dropdown = gr.Dropdown(label=i18n("GPT模型列表"), choices=sorted(
225
- GPT_names, key=custom_sort_key), value=gpt_path, interactive=True)
226
- SoVITS_dropdown = gr.Dropdown(label=i18n("SoVITS模型列表"), choices=sorted(
227
- SoVITS_names, key=custom_sort_key), value=sovits_path, interactive=True)
 
 
 
 
 
 
 
 
228
  refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary")
229
- refresh_button.click(fn=change_choices, inputs=[], outputs=[
230
- SoVITS_dropdown, GPT_dropdown])
 
 
 
231
 
232
  with gr.Row():
233
  with gr.Column():
234
  gr.Markdown(value=i18n("*请上传并填写参考信息"))
235
  with gr.Row():
236
- inp_ref = gr.Audio("./ref.wav", type="filepath")
237
- inp_refs = gr.File(label=i18n(
238
- "辅参考音频(可选多个,或不选)"), file_count="multiple")
 
 
 
 
239
  prompt_text = gr.Textbox(
240
- label=i18n("主参考音频的文本"), value="", lines=2)
 
241
  with gr.Row():
242
  prompt_language = gr.Dropdown(
243
- label=i18n("主参考音频的语种"), choices=list(dict_language.keys()), value=i18n("中文")
 
 
244
  )
245
  with gr.Column():
246
- ref_text_free = gr.Checkbox(label=i18n(
247
- "开启无参考文本模式。不填参考文本亦相当于开启。"), value=False, interactive=True, show_label=True)
 
 
 
 
 
 
248
  gr.Markdown(
249
- i18n("使用无参考文本模式时建议使用微调的GPT,听不清参考音频说的啥(不晓得写啥)可以开,开启后无视填写的参考文本。"))
 
 
 
250
 
251
  with gr.Column():
252
  gr.Markdown(value=i18n("*请填写需要合成的目标文本和语种模式"))
253
- text = gr.Textbox(label=i18n("需要合成的文本"),
254
- value="", lines=20, max_lines=20)
 
255
  text_language = gr.Dropdown(
256
- label=i18n("需要合成的文本的语种"), choices=list(dict_language.keys()), value=i18n("中文")
 
 
257
  )
258
 
259
  with gr.Group():
260
  gr.Markdown(value=i18n("推理设置"))
261
  with gr.Row():
262
-
263
  with gr.Column():
264
- batch_size = gr.Slider(minimum=1, maximum=200, step=1, label=i18n(
265
- "batch_size"), value=20, interactive=True)
266
- fragment_interval = gr.Slider(minimum=0.01, maximum=1, step=0.01, label=i18n(
267
- "分段间隔(秒)"), value=0.3, interactive=True)
 
 
 
 
 
 
 
 
 
 
 
 
268
  speed_factor = gr.Slider(
269
- minimum=0.6, maximum=1.65, step=0.05, label="speed_factor", value=1.0, interactive=True)
270
- top_k = gr.Slider(minimum=1, maximum=100, step=1, label=i18n(
271
- "top_k"), value=5, interactive=True)
272
- top_p = gr.Slider(minimum=0, maximum=1, step=0.05, label=i18n(
273
- "top_p"), value=1, interactive=True)
274
- temperature = gr.Slider(minimum=0, maximum=1, step=0.05, label=i18n(
275
- "temperature"), value=1, interactive=True)
276
- repetition_penalty = gr.Slider(minimum=0, maximum=2, step=0.05, label=i18n(
277
- "重复惩罚"), value=1.35, interactive=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
  with gr.Column():
279
  with gr.Row():
280
  how_to_cut = gr.Dropdown(
281
  label=i18n("怎么切"),
282
- choices=[i18n("不切"), i18n("凑四句一切"), i18n("凑50字一切"), i18n(
283
- "按中文句号。切"), i18n("按英文句号.切"), i18n("按标点符号切"), ],
 
 
 
 
 
 
284
  value=i18n("凑四句一切"),
285
- interactive=True, scale=1
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  )
287
- parallel_infer = gr.Checkbox(label=i18n(
288
- "并行推理"), value=True, interactive=True, show_label=True)
289
- split_bucket = gr.Checkbox(label=i18n(
290
- "数据分桶(并行推理时会降低一点计算量)"), value=True, interactive=True, show_label=True)
291
 
292
  with gr.Row():
293
  seed = gr.Number(label=i18n("随机种子"), value=-1)
294
- keep_random = gr.Checkbox(label=i18n(
295
- "保持随机"), value=True, interactive=True, show_label=True)
 
 
 
 
296
 
297
  output = gr.Audio(label=i18n("输出的语音"))
298
  with gr.Row():
299
  inference_button = gr.Button(
300
- i18n("合成语音"), variant="primary")
 
301
  stop_infer = gr.Button(i18n("终止合成"), variant="primary")
302
 
303
  inference_button.click(
304
  inference,
305
  [
306
- text, text_language, inp_ref, inp_refs,
307
- prompt_text, prompt_language,
308
- top_k, top_p, temperature,
309
- how_to_cut, batch_size,
310
- speed_factor, ref_text_free,
311
- split_bucket, fragment_interval,
312
- seed, keep_random, parallel_infer,
313
- repetition_penalty
 
 
 
 
 
 
 
 
 
 
 
314
  ],
315
  [output, seed],
316
  )
317
  stop_infer.click(tts_pipeline.stop, [], [])
318
- SoVITS_dropdown.change(change_sovits_weights, [SoVITS_dropdown, prompt_language, text_language], [
319
- prompt_language, text_language, prompt_text, prompt_language, text, text_language])
320
- GPT_dropdown.change(
321
- tts_pipeline.init_t2s_weights, [GPT_dropdown], [])
 
 
 
 
 
 
 
 
 
322
 
323
  # with gr.Group():
324
  # gr.Markdown(value=i18n(
@@ -348,7 +484,7 @@ async def create_app():
348
  return app
349
 
350
 
351
- if __name__ == '__main__':
352
  app = asyncio.run(create_app())
353
  app.launch(
354
  # server_name="0.0.0.0",
 
3
 
4
  ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1
5
  """
6
+
7
  import asyncio
8
  import logging
9
  import os
 
99
 
100
 
101
  @spaces.GPU
102
+ def inference(
103
+ text,
104
+ text_lang,
105
+ ref_audio_path,
106
+ aux_ref_audio_paths,
107
+ prompt_text,
108
+ prompt_lang,
109
+ top_k,
110
+ top_p,
111
+ temperature,
112
+ text_split_method,
113
+ batch_size,
114
+ speed_factor,
115
+ ref_text_free,
116
+ split_bucket,
117
+ fragment_interval,
118
+ seed,
119
+ keep_random,
120
+ parallel_infer,
121
+ repetition_penalty,
122
+ ):
123
  seed = -1 if keep_random else seed
124
+ actual_seed = seed if seed not in [-1, "", None] else random.randrange(1 << 32)
 
125
  inputs = {
126
  "text": text,
127
  "text_lang": dict_language[text_lang],
128
  "ref_audio_path": ref_audio_path,
129
+ "aux_ref_audio_paths": [item.name for item in aux_ref_audio_paths]
130
+ if aux_ref_audio_paths is not None
131
+ else [],
132
  "prompt_text": prompt_text if not ref_text_free else "",
133
  "prompt_lang": dict_language[prompt_lang],
134
  "top_k": top_k,
 
150
 
151
  def custom_sort_key(s):
152
  # 使用正则表达式提取字符串中的数字部分和非数字部分
153
+ parts = re.split("(\d+)", s)
154
  # 将数字部分转换为整数,非数字部分保持不变
155
  parts = [int(part) if part.isdigit() else part for part in parts]
156
  return parts
157
 
158
 
159
  def change_choices():
160
+ SoVITS_names, GPT_names = get_weights_names(GPT_weight_root, SoVITS_weight_root)
161
+ return {
162
+ "choices": sorted(SoVITS_names, key=custom_sort_key),
163
+ "__type__": "update",
164
+ }, {"choices": sorted(GPT_names, key=custom_sort_key), "__type__": "update"}
165
 
166
 
167
  pretrained_sovits_name = [
168
+ "GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s2G2333k.pth",
169
+ "GPT_SoVITS/pretrained_models/s2G488k.pth",
170
+ ]
171
+ pretrained_gpt_name = [
172
+ "GPT_SoVITS/pretrained_models/gsv-v2final-pretrained/s1bert25hz-5kh-longer-epoch=12-step=369668.ckpt",
173
+ "GPT_SoVITS/pretrained_models/s1bert25hz-2kh-longer-epoch=68e-step=50232.ckpt",
174
+ ]
175
  _ = [[], []]
176
  for i in range(2):
177
  if os.path.exists(pretrained_gpt_name[i]):
 
182
 
183
  SoVITS_weight_root = ["SoVITS_weights_v2", "SoVITS_weights"]
184
  GPT_weight_root = ["GPT_weights_v2", "GPT_weights"]
185
+ for path in SoVITS_weight_root + GPT_weight_root:
186
  os.makedirs(path, exist_ok=True)
187
 
188
 
 
200
  return SoVITS_names, GPT_names
201
 
202
 
203
+ SoVITS_names, GPT_names = get_weights_names(GPT_weight_root, SoVITS_weight_root)
 
204
 
205
 
206
  def change_sovits_weights(sovits_path, prompt_language=None, text_language=None):
 
209
  dict_language = dict_language_v2
210
  if prompt_language is not None and text_language is not None:
211
  if prompt_language in list(dict_language.keys()):
212
+ prompt_text_update, prompt_language_update = (
213
+ {"__type__": "update"},
214
+ {"__type__": "update", "value": prompt_language},
215
+ )
216
  else:
217
+ prompt_text_update = {"__type__": "update", "value": ""}
218
+ prompt_language_update = {"__type__": "update", "value": i18n("中文")}
 
219
  if text_language in list(dict_language.keys()):
220
+ text_update, text_language_update = (
221
+ {"__type__": "update"},
222
+ {"__type__": "update", "value": text_language},
223
+ )
224
  else:
225
+ text_update = {"__type__": "update", "value": ""}
226
+ text_language_update = {"__type__": "update", "value": i18n("中文")}
227
+ return (
228
+ {"__type__": "update", "choices": list(dict_language.keys())},
229
+ {"__type__": "update", "choices": list(dict_language.keys())},
230
+ prompt_text_update,
231
+ prompt_language_update,
232
+ text_update,
233
+ text_language_update,
234
+ )
235
 
236
 
237
  async def create_app():
 
246
  # with gr.Group():
247
  gr.Markdown(value=i18n("模型切换"))
248
  with gr.Row():
249
+ GPT_dropdown = gr.Dropdown(
250
+ label=i18n("GPT模型列表"),
251
+ choices=sorted(GPT_names, key=custom_sort_key),
252
+ value=gpt_path,
253
+ interactive=True,
254
+ )
255
+ SoVITS_dropdown = gr.Dropdown(
256
+ label=i18n("SoVITS模型列表"),
257
+ choices=sorted(SoVITS_names, key=custom_sort_key),
258
+ value=sovits_path,
259
+ interactive=True,
260
+ )
261
  refresh_button = gr.Button(i18n("刷新模型路径"), variant="primary")
262
+ refresh_button.click(
263
+ fn=change_choices,
264
+ inputs=[],
265
+ outputs=[SoVITS_dropdown, GPT_dropdown],
266
+ )
267
 
268
  with gr.Row():
269
  with gr.Column():
270
  gr.Markdown(value=i18n("*请上传并填写参考信息"))
271
  with gr.Row():
272
+ inp_ref = gr.Audio(
273
+ label="上傳 3-10 秒長嘅參考音頻", type="filepath"
274
+ )
275
+ inp_refs = gr.File(
276
+ label=i18n("辅参考音频(可选多个,或不选)"),
277
+ file_count="multiple",
278
+ )
279
  prompt_text = gr.Textbox(
280
+ label=i18n("主参考音频的文本"), value="", lines=2
281
+ )
282
  with gr.Row():
283
  prompt_language = gr.Dropdown(
284
+ label=i18n("主参考音频的语种"),
285
+ choices=list(dict_language.keys()),
286
+ value=i18n("中文"),
287
  )
288
  with gr.Column():
289
+ ref_text_free = gr.Checkbox(
290
+ label=i18n(
291
+ "开启无参考文本模式。不填参考文本亦相当于开启。"
292
+ ),
293
+ value=False,
294
+ interactive=True,
295
+ show_label=True,
296
+ )
297
  gr.Markdown(
298
+ i18n(
299
+ "使用无参考文本模式时建议使用微调的GPT,听不清参考音频说的啥(不晓得写啥)可以开,开启后无视填写的参考文本。"
300
+ )
301
+ )
302
 
303
  with gr.Column():
304
  gr.Markdown(value=i18n("*请填写需要合成的目标文本和语种模式"))
305
+ text = gr.Textbox(
306
+ label=i18n("需要合成的文本"), value="", lines=20, max_lines=20
307
+ )
308
  text_language = gr.Dropdown(
309
+ label=i18n("需要合成的文本的语种"),
310
+ choices=list(dict_language.keys()),
311
+ value=i18n("中文"),
312
  )
313
 
314
  with gr.Group():
315
  gr.Markdown(value=i18n("推理设置"))
316
  with gr.Row():
 
317
  with gr.Column():
318
+ batch_size = gr.Slider(
319
+ minimum=1,
320
+ maximum=200,
321
+ step=1,
322
+ label=i18n("batch_size"),
323
+ value=20,
324
+ interactive=True,
325
+ )
326
+ fragment_interval = gr.Slider(
327
+ minimum=0.01,
328
+ maximum=1,
329
+ step=0.01,
330
+ label=i18n("分段间隔(秒)"),
331
+ value=0.3,
332
+ interactive=True,
333
+ )
334
  speed_factor = gr.Slider(
335
+ minimum=0.6,
336
+ maximum=1.65,
337
+ step=0.05,
338
+ label="speed_factor",
339
+ value=1.0,
340
+ interactive=True,
341
+ )
342
+ top_k = gr.Slider(
343
+ minimum=1,
344
+ maximum=100,
345
+ step=1,
346
+ label=i18n("top_k"),
347
+ value=5,
348
+ interactive=True,
349
+ )
350
+ top_p = gr.Slider(
351
+ minimum=0,
352
+ maximum=1,
353
+ step=0.05,
354
+ label=i18n("top_p"),
355
+ value=1,
356
+ interactive=True,
357
+ )
358
+ temperature = gr.Slider(
359
+ minimum=0,
360
+ maximum=1,
361
+ step=0.05,
362
+ label=i18n("temperature"),
363
+ value=1,
364
+ interactive=True,
365
+ )
366
+ repetition_penalty = gr.Slider(
367
+ minimum=0,
368
+ maximum=2,
369
+ step=0.05,
370
+ label=i18n("重复惩罚"),
371
+ value=1.35,
372
+ interactive=True,
373
+ )
374
  with gr.Column():
375
  with gr.Row():
376
  how_to_cut = gr.Dropdown(
377
  label=i18n("怎么切"),
378
+ choices=[
379
+ i18n("不切"),
380
+ i18n("凑四句一切"),
381
+ i18n("凑50字一切"),
382
+ i18n("按中文句号。切"),
383
+ i18n("按英文句号.���"),
384
+ i18n("按标点符号切"),
385
+ ],
386
  value=i18n("凑四句一切"),
387
+ interactive=True,
388
+ scale=1,
389
+ )
390
+ parallel_infer = gr.Checkbox(
391
+ label=i18n("并行推理"),
392
+ value=True,
393
+ interactive=True,
394
+ show_label=True,
395
+ )
396
+ split_bucket = gr.Checkbox(
397
+ label=i18n("数据分桶(并行推理时会降低一点计算量)"),
398
+ value=True,
399
+ interactive=True,
400
+ show_label=True,
401
  )
 
 
 
 
402
 
403
  with gr.Row():
404
  seed = gr.Number(label=i18n("随机种子"), value=-1)
405
+ keep_random = gr.Checkbox(
406
+ label=i18n("保持随机"),
407
+ value=True,
408
+ interactive=True,
409
+ show_label=True,
410
+ )
411
 
412
  output = gr.Audio(label=i18n("输出的语音"))
413
  with gr.Row():
414
  inference_button = gr.Button(
415
+ i18n("合成语音"), variant="primary"
416
+ )
417
  stop_infer = gr.Button(i18n("终止合成"), variant="primary")
418
 
419
  inference_button.click(
420
  inference,
421
  [
422
+ text,
423
+ text_language,
424
+ inp_ref,
425
+ inp_refs,
426
+ prompt_text,
427
+ prompt_language,
428
+ top_k,
429
+ top_p,
430
+ temperature,
431
+ how_to_cut,
432
+ batch_size,
433
+ speed_factor,
434
+ ref_text_free,
435
+ split_bucket,
436
+ fragment_interval,
437
+ seed,
438
+ keep_random,
439
+ parallel_infer,
440
+ repetition_penalty,
441
  ],
442
  [output, seed],
443
  )
444
  stop_infer.click(tts_pipeline.stop, [], [])
445
+ SoVITS_dropdown.change(
446
+ change_sovits_weights,
447
+ [SoVITS_dropdown, prompt_language, text_language],
448
+ [
449
+ prompt_language,
450
+ text_language,
451
+ prompt_text,
452
+ prompt_language,
453
+ text,
454
+ text_language,
455
+ ],
456
+ )
457
+ GPT_dropdown.change(tts_pipeline.init_t2s_weights, [GPT_dropdown], [])
458
 
459
  # with gr.Group():
460
  # gr.Markdown(value=i18n(
 
484
  return app
485
 
486
 
487
+ if __name__ == "__main__":
488
  app = asyncio.run(create_app())
489
  app.launch(
490
  # server_name="0.0.0.0",