kevinwang676
commited on
Commit
•
52f3cb0
1
Parent(s):
5580fe1
Update app.py
Browse files
app.py
CHANGED
@@ -14,6 +14,15 @@ import torch
|
|
14 |
import pytorch_seed
|
15 |
import time
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
from xml.sax import saxutils
|
18 |
from bark.api import generate_with_settings
|
19 |
from bark.api import save_as_prompt
|
@@ -60,6 +69,14 @@ import subprocess
|
|
60 |
|
61 |
OUTPUTFOLDER = "Outputs"
|
62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
def generate_text_to_speech(text, selected_speaker, text_temp, waveform_temp, eos_prob, quick_generation, complete_settings, seed, progress=gr.Progress(track_tqdm=True)):
|
65 |
if text == None or len(text) < 1:
|
@@ -429,55 +446,35 @@ while run_server:
|
|
429 |
# Create Gradio Blocks
|
430 |
|
431 |
with gr.Blocks(title=f"{APPTITLE}", mode=f"{APPTITLE}", theme=settings.selected_theme) as barkgui:
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
|
|
|
|
|
|
439 |
with gr.Row():
|
440 |
with gr.Column():
|
441 |
-
placeholder = "
|
442 |
-
input_text = gr.Textbox(label="
|
443 |
with gr.Column():
|
444 |
seedcomponent = gr.Number(label="Seed (default -1 = Random)", precision=0, value=-1)
|
445 |
convert_to_ssml_button = gr.Button("Convert Text to SSML")
|
446 |
-
with gr.Row():
|
447 |
-
with gr.Column():
|
448 |
-
examples = [
|
449 |
-
"Special meanings: [laughter] [laughs] [sighs] [music] [gasps] [clears throat] MAN: WOMAN:",
|
450 |
-
"♪ Never gonna make you cry, never gonna say goodbye, never gonna tell a lie and hurt you ♪",
|
451 |
-
"And now — a picture of a larch [laughter]",
|
452 |
-
"""
|
453 |
-
WOMAN: I would like an oatmilk latte please.
|
454 |
-
MAN: Wow, that's expensive!
|
455 |
-
""",
|
456 |
-
"""<?xml version="1.0"?>
|
457 |
-
<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis"
|
458 |
-
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
459 |
-
xsi:schemaLocation="http://www.w3.org/2001/10/synthesis
|
460 |
-
http://www.w3.org/TR/speech-synthesis/synthesis.xsd"
|
461 |
-
xml:lang="en-US">
|
462 |
-
<voice name="en_speaker_9">Look at that drunk guy!</voice>
|
463 |
-
<voice name="en_speaker_3">Who is he?</voice>
|
464 |
-
<voice name="en_speaker_9">WOMAN: [clears throat] 10 years ago, he proposed me and I rejected him.</voice>
|
465 |
-
<voice name="en_speaker_3">Oh my God [laughs] he is still celebrating</voice>
|
466 |
-
</speak>"""
|
467 |
-
]
|
468 |
-
examples = gr.Examples(examples=examples, inputs=input_text)
|
469 |
|
470 |
with gr.Row():
|
471 |
with gr.Column():
|
472 |
-
gr.Markdown("[
|
473 |
-
speaker = gr.Dropdown(speakers_list, value=speakers_list[0], label="
|
474 |
with gr.Column():
|
475 |
-
text_temp = gr.Slider(0.1, 1.0, value=0.
|
476 |
waveform_temp = gr.Slider(0.1, 1.0, value=0.7, label="Waveform temperature", info="1.0 more diverse, 0.1 more conservative")
|
477 |
|
478 |
with gr.Row():
|
479 |
with gr.Column():
|
480 |
-
quick_gen_checkbox = gr.Checkbox(label="
|
481 |
settings_checkboxes = ["Use last generation as history", "Save generation as Voice"]
|
482 |
complete_settings = gr.CheckboxGroup(choices=settings_checkboxes, value=settings_checkboxes, label="Detailed Generation Settings", type="value", interactive=True, visible=False)
|
483 |
with gr.Column():
|
@@ -485,32 +482,54 @@ while run_server:
|
|
485 |
|
486 |
with gr.Row():
|
487 |
with gr.Column():
|
488 |
-
tts_create_button = gr.Button("
|
489 |
with gr.Column():
|
490 |
hidden_checkbox = gr.Checkbox(visible=False)
|
491 |
-
button_stop_generation = gr.Button("
|
492 |
with gr.Row():
|
493 |
-
output_audio = gr.Audio(label="
|
494 |
|
495 |
with gr.Row():
|
496 |
-
inp1 = gr.Audio(label=
|
497 |
inp2 = output_audio
|
498 |
inp3 = output_audio
|
499 |
-
btn = gr.Button("
|
500 |
-
out1 = gr.Audio(label=
|
501 |
btn.click(voice_conversion, [inp1, inp2, inp3], [out1])
|
502 |
-
|
503 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
504 |
|
505 |
-
|
506 |
-
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
|
511 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
512 |
|
513 |
-
with gr.Tab("
|
514 |
with gr.Row():
|
515 |
themes = gr.Dropdown(available_themes, label="Theme", info="Change needs complete restart", value=settings.selected_theme)
|
516 |
with gr.Row():
|
@@ -529,6 +548,13 @@ while run_server:
|
|
529 |
button_apply_restart = gr.Button("Restart Server")
|
530 |
button_delete_files = gr.Button("Clear output folder")
|
531 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
532 |
quick_gen_checkbox.change(fn=on_quick_gen_changed, inputs=quick_gen_checkbox, outputs=complete_settings)
|
533 |
convert_to_ssml_button.click(convert_text_to_ssml, inputs=[input_text, speaker],outputs=input_text)
|
534 |
gen_click = tts_create_button.click(generate_text_to_speech, inputs=[input_text, speaker, text_temp, waveform_temp, eos_prob, quick_gen_checkbox, complete_settings, seedcomponent],outputs=output_audio)
|
|
|
14 |
import pytorch_seed
|
15 |
import time
|
16 |
|
17 |
+
import torchaudio
|
18 |
+
from speechbrain.pretrained import SpectralMaskEnhancement
|
19 |
+
|
20 |
+
enhance_model = SpectralMaskEnhancement.from_hparams(
|
21 |
+
source="speechbrain/metricgan-plus-voicebank",
|
22 |
+
savedir="pretrained_models/metricgan-plus-voicebank",
|
23 |
+
run_opts={"device":"cuda"},
|
24 |
+
)
|
25 |
+
|
26 |
from xml.sax import saxutils
|
27 |
from bark.api import generate_with_settings
|
28 |
from bark.api import save_as_prompt
|
|
|
69 |
|
70 |
OUTPUTFOLDER = "Outputs"
|
71 |
|
72 |
+
def speechbrain(aud):
|
73 |
+
# Load and add fake batch dimension
|
74 |
+
noisy = enhance_model.load_audio(
|
75 |
+
aud
|
76 |
+
).unsqueeze(0)
|
77 |
+
enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
|
78 |
+
torchaudio.save('enhanced.wav', enhanced.cpu(), 16000)
|
79 |
+
return 'enhanced.wav'
|
80 |
|
81 |
def generate_text_to_speech(text, selected_speaker, text_temp, waveform_temp, eos_prob, quick_generation, complete_settings, seed, progress=gr.Progress(track_tqdm=True)):
|
82 |
if text == None or len(text) < 1:
|
|
|
446 |
# Create Gradio Blocks
|
447 |
|
448 |
with gr.Blocks(title=f"{APPTITLE}", mode=f"{APPTITLE}", theme=settings.selected_theme) as barkgui:
|
449 |
+
gr.Markdown("# <center>🐶🥳🎶 - Bark拟声最新版,开启声音真实复刻的新纪元!</center>")
|
450 |
+
gr.Markdown("### <center>🦄 - [Bark](https://github.com/suno-ai/bark)拟声,能够实现语音、语调及说话情感的真实复刻</center>")
|
451 |
+
gr.Markdown(
|
452 |
+
f"""
|
453 |
+
### <center>🤗 - Powered by [Bark Enhanced(https://github.com/C0untFloyd/bark-gui). Thanks to C0untFloyd.</center>
|
454 |
+
### <center>1. 您可以复制该程序并用GPU运行: <a href="https://huggingface.co/spaces/{os.getenv('SPACE_ID')}?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a></center>
|
455 |
+
### <center>2. 更多精彩应用,敬请关注[滔滔AI](http://www.talktalkai.com);滔滔AI,为爱滔滔!💕</center>
|
456 |
+
"""
|
457 |
+
)
|
458 |
+
with gr.Tab("🐶 - Bark拟声"):
|
459 |
with gr.Row():
|
460 |
with gr.Column():
|
461 |
+
placeholder = "想让Bark说些什么呢?"
|
462 |
+
input_text = gr.Textbox(label="用作声音合成的文本", lines=4, placeholder=placeholder)
|
463 |
with gr.Column():
|
464 |
seedcomponent = gr.Number(label="Seed (default -1 = Random)", precision=0, value=-1)
|
465 |
convert_to_ssml_button = gr.Button("Convert Text to SSML")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
466 |
|
467 |
with gr.Row():
|
468 |
with gr.Column():
|
469 |
+
gr.Markdown("查看Bark官方[语言库](https://suno-ai.notion.site/8b8e8749ed514b0cbf3f699013548683?v=bc67cff786b04b50b3ceb756fd05f68c)")
|
470 |
+
speaker = gr.Dropdown(speakers_list, value=speakers_list[0], label="中英双语的不同声音供您选择")
|
471 |
with gr.Column():
|
472 |
+
text_temp = gr.Slider(0.1, 1.0, value=0.7, label="Generation Temperature", info="1.0 more diverse, 0.1 more conservative")
|
473 |
waveform_temp = gr.Slider(0.1, 1.0, value=0.7, label="Waveform temperature", info="1.0 more diverse, 0.1 more conservative")
|
474 |
|
475 |
with gr.Row():
|
476 |
with gr.Column():
|
477 |
+
quick_gen_checkbox = gr.Checkbox(label="是否要快速合成语音", value=True)
|
478 |
settings_checkboxes = ["Use last generation as history", "Save generation as Voice"]
|
479 |
complete_settings = gr.CheckboxGroup(choices=settings_checkboxes, value=settings_checkboxes, label="Detailed Generation Settings", type="value", interactive=True, visible=False)
|
480 |
with gr.Column():
|
|
|
482 |
|
483 |
with gr.Row():
|
484 |
with gr.Column():
|
485 |
+
tts_create_button = gr.Button("开始声音真实复刻吧")
|
486 |
with gr.Column():
|
487 |
hidden_checkbox = gr.Checkbox(visible=False)
|
488 |
+
button_stop_generation = gr.Button("停止生成")
|
489 |
with gr.Row():
|
490 |
+
output_audio = gr.Audio(label="真实复刻的声音")
|
491 |
|
492 |
with gr.Row():
|
493 |
+
inp1 = gr.Audio(label="请上传您喜欢的声音")
|
494 |
inp2 = output_audio
|
495 |
inp3 = output_audio
|
496 |
+
btn = gr.Button("开始生成专属声音吧")
|
497 |
+
out1 = gr.Audio(label="为你生成的专属声音")
|
498 |
btn.click(voice_conversion, [inp1, inp2, inp3], [out1])
|
|
|
499 |
|
500 |
+
with gr.Row():
|
501 |
+
inp4 = out1
|
502 |
+
btn2 = gr.Button("对专属声音降噪吧")
|
503 |
+
out2 = gr.Audio(label="降噪后的专属声音")
|
504 |
+
btn2.click(speechbrain, [inp4], [out2])
|
505 |
+
|
506 |
+
|
507 |
|
508 |
+
with gr.Row():
|
509 |
+
with gr.Column():
|
510 |
+
examples = [
|
511 |
+
"Special meanings: [laughter] [laughs] [sighs] [music] [gasps] [clears throat] MAN: WOMAN:",
|
512 |
+
"♪ Never gonna make you cry, never gonna say goodbye, never gonna tell a lie and hurt you ♪",
|
513 |
+
"And now — a picture of a larch [laughter]",
|
514 |
+
"""
|
515 |
+
WOMAN: I would like an oatmilk latte please.
|
516 |
+
MAN: Wow, that's expensive!
|
517 |
+
""",
|
518 |
+
"""<?xml version="1.0"?>
|
519 |
+
<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis"
|
520 |
+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
521 |
+
xsi:schemaLocation="http://www.w3.org/2001/10/synthesis
|
522 |
+
http://www.w3.org/TR/speech-synthesis/synthesis.xsd"
|
523 |
+
xml:lang="en-US">
|
524 |
+
<voice name="en_speaker_9">Look at that drunk guy!</voice>
|
525 |
+
<voice name="en_speaker_3">Who is he?</voice>
|
526 |
+
<voice name="en_speaker_9">WOMAN: [clears throat] 10 years ago, he proposed me and I rejected him.</voice>
|
527 |
+
<voice name="en_speaker_3">Oh my God [laughs] he is still celebrating</voice>
|
528 |
+
</speak>"""
|
529 |
+
]
|
530 |
+
examples = gr.Examples(examples=examples, inputs=input_text)
|
531 |
|
532 |
+
with gr.Tab("🤖 - 设置"):
|
533 |
with gr.Row():
|
534 |
themes = gr.Dropdown(available_themes, label="Theme", info="Change needs complete restart", value=settings.selected_theme)
|
535 |
with gr.Row():
|
|
|
548 |
button_apply_restart = gr.Button("Restart Server")
|
549 |
button_delete_files = gr.Button("Clear output folder")
|
550 |
|
551 |
+
gr.HTML('''
|
552 |
+
<div class="footer">
|
553 |
+
<p>🌊🏞️🎶 - 江水东流急,滔滔无尽声。 明·顾璘
|
554 |
+
</p>
|
555 |
+
</div>
|
556 |
+
''')
|
557 |
+
|
558 |
quick_gen_checkbox.change(fn=on_quick_gen_changed, inputs=quick_gen_checkbox, outputs=complete_settings)
|
559 |
convert_to_ssml_button.click(convert_text_to_ssml, inputs=[input_text, speaker],outputs=input_text)
|
560 |
gen_click = tts_create_button.click(generate_text_to_speech, inputs=[input_text, speaker, text_temp, waveform_temp, eos_prob, quick_gen_checkbox, complete_settings, seedcomponent],outputs=output_audio)
|