Bark-UI-with-Voice-Cloning-2

Paused

App Files Files Community

kevinwang676 commited on May 10, 2023

Commit

52f3cb0

•

1 Parent(s): 5580fe1

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -51

app.py CHANGED Viewed

@@ -14,6 +14,15 @@ import torch
 import pytorch_seed
 import time
 from xml.sax import saxutils
 from bark.api import generate_with_settings
 from bark.api import save_as_prompt
@@ -60,6 +69,14 @@ import subprocess
 OUTPUTFOLDER = "Outputs"
 def generate_text_to_speech(text, selected_speaker, text_temp, waveform_temp, eos_prob, quick_generation, complete_settings, seed, progress=gr.Progress(track_tqdm=True)):
     if text == None or len(text) < 1:
@@ -429,55 +446,35 @@ while run_server:
     # Create Gradio Blocks
     with gr.Blocks(title=f"{APPTITLE}", mode=f"{APPTITLE}", theme=settings.selected_theme) as barkgui:
-        with gr.Row():
-            with gr.Column():
-                gr.Markdown(f"### [{APPTITLE}](https://github.com/C0untFloyd/bark-gui)")
-            with gr.Column():
-                gr.HTML(create_version_html(), elem_id="versions")
-        with gr.Tab("TTS"):
             with gr.Row():
                 with gr.Column():
-                    placeholder = "Enter text here."
-                    input_text = gr.Textbox(label="Input Text", lines=4, placeholder=placeholder)
                 with gr.Column():
                     seedcomponent = gr.Number(label="Seed (default -1 = Random)", precision=0, value=-1)
                     convert_to_ssml_button = gr.Button("Convert Text to SSML")
-            with gr.Row():
-                with gr.Column():
-                    examples = [
-                        "Special meanings: [laughter] [laughs] [sighs] [music] [gasps] [clears throat] MAN: WOMAN:",
-                       "♪ Never gonna make you cry, never gonna say goodbye, never gonna tell a lie and hurt you ♪",
-                       "And now — a picture of a larch [laughter]",
-                       """
-                            WOMAN: I would like an oatmilk latte please.
-                            MAN: Wow, that's expensive!
-                       """,
-                       """<?xml version="1.0"?>
-    <speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis"
-             xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-             xsi:schemaLocation="http://www.w3.org/2001/10/synthesis
-                       http://www.w3.org/TR/speech-synthesis/synthesis.xsd"
-             xml:lang="en-US">
-    <voice name="en_speaker_9">Look at that drunk guy!</voice>
-    <voice name="en_speaker_3">Who is he?</voice>
-    <voice name="en_speaker_9">WOMAN: [clears throat] 10 years ago, he proposed me and I rejected him.</voice>
-    <voice name="en_speaker_3">Oh my God [laughs] he is still celebrating</voice>
-    </speak>"""
-                       ]
-                    examples = gr.Examples(examples=examples, inputs=input_text)
             with gr.Row():
                 with gr.Column():
-                    gr.Markdown("[Voice Prompt Library](https://suno-ai.notion.site/8b8e8749ed514b0cbf3f699013548683?v=bc67cff786b04b50b3ceb756fd05f68c)")
-                    speaker = gr.Dropdown(speakers_list, value=speakers_list[0], label="Voice")
                 with gr.Column():
-                    text_temp = gr.Slider(0.1, 1.0, value=0.6, label="Generation Temperature", info="1.0 more diverse, 0.1 more conservative")
                     waveform_temp = gr.Slider(0.1, 1.0, value=0.7, label="Waveform temperature", info="1.0 more diverse, 0.1 more conservative")
             with gr.Row():
                 with gr.Column():
-                    quick_gen_checkbox = gr.Checkbox(label="Quick Generation", value=True)
                     settings_checkboxes = ["Use last generation as history", "Save generation as Voice"]
                     complete_settings = gr.CheckboxGroup(choices=settings_checkboxes, value=settings_checkboxes, label="Detailed Generation Settings", type="value", interactive=True, visible=False)
                 with gr.Column():
@@ -485,32 +482,54 @@ while run_server:
             with gr.Row():
                 with gr.Column():
-                    tts_create_button = gr.Button("Generate")
                 with gr.Column():
                     hidden_checkbox = gr.Checkbox(visible=False)
-                    button_stop_generation = gr.Button("Stop generation")
             with gr.Row():
-                output_audio = gr.Audio(label="Generated Audio")
             with gr.Row():
-                inp1 = gr.Audio(label='Target Speaker - Reference Clip')
                 inp2 = output_audio
                 inp3 = output_audio
-                btn = gr.Button("Generate")
-                out1 = gr.Audio(label='Target Speaker - Converted Clip')
             btn.click(voice_conversion, [inp1, inp2, inp3], [out1])
-        with gr.Tab("Clone Voice"):
-            input_audio_filename = gr.Audio(label="Input audio.wav", source="upload", type="filepath")
-            transcription_text = gr.Textbox(label="Transcription Text", lines=1, placeholder="Enter Text of your Audio Sample here...")
-            initialname = "./bark/assets/prompts/custom/MeMyselfAndI"
-            output_voice = gr.Textbox(label="Filename of trained Voice", lines=1, placeholder=initialname, value=initialname)
-            clone_voice_button = gr.Button("Create Voice")
-            dummy = gr.Text(label="Progress")
-        with gr.Tab("Settings"):
             with gr.Row():
                 themes = gr.Dropdown(available_themes, label="Theme", info="Change needs complete restart", value=settings.selected_theme)
             with gr.Row():
@@ -529,6 +548,13 @@ while run_server:
                 button_apply_restart = gr.Button("Restart Server")
                 button_delete_files = gr.Button("Clear output folder")
         quick_gen_checkbox.change(fn=on_quick_gen_changed, inputs=quick_gen_checkbox, outputs=complete_settings)
         convert_to_ssml_button.click(convert_text_to_ssml, inputs=[input_text, speaker],outputs=input_text)
         gen_click = tts_create_button.click(generate_text_to_speech, inputs=[input_text, speaker, text_temp, waveform_temp, eos_prob, quick_gen_checkbox, complete_settings, seedcomponent],outputs=output_audio)

 import pytorch_seed
 import time
+import torchaudio
+from speechbrain.pretrained import SpectralMaskEnhancement
+enhance_model = SpectralMaskEnhancement.from_hparams(
+    source="speechbrain/metricgan-plus-voicebank",
+    savedir="pretrained_models/metricgan-plus-voicebank",
+    run_opts={"device":"cuda"},
+)
 from xml.sax import saxutils
 from bark.api import generate_with_settings
 from bark.api import save_as_prompt
 OUTPUTFOLDER = "Outputs"
+def speechbrain(aud):
+  # Load and add fake batch dimension
+  noisy = enhance_model.load_audio(
+      aud
+  ).unsqueeze(0)
+  enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
+  torchaudio.save('enhanced.wav', enhanced.cpu(), 16000)
+  return 'enhanced.wav'
 def generate_text_to_speech(text, selected_speaker, text_temp, waveform_temp, eos_prob, quick_generation, complete_settings, seed, progress=gr.Progress(track_tqdm=True)):
     if text == None or len(text) < 1:
     # Create Gradio Blocks
     with gr.Blocks(title=f"{APPTITLE}", mode=f"{APPTITLE}", theme=settings.selected_theme) as barkgui:
+        gr.Markdown("# <center>🐶🥳🎶 - Bark拟声最新版，开启声音真实复刻的新纪元！</center>")
+        gr.Markdown("### <center>🦄 - [Bark](https://github.com/suno-ai/bark)拟声，能够实现语音、语调及说话情感的真实复刻</center>")
+        gr.Markdown(
+                f"""
+                ### <center>🤗 - Powered by [Bark Enhanced(https://github.com/C0untFloyd/bark-gui). Thanks to C0untFloyd.</center>
+                ### <center>1. 您可以复制该程序并用GPU运行: <a href="https://huggingface.co/spaces/{os.getenv('SPACE_ID')}?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a></center>
+                ### <center>2. 更多精彩应用，敬请关注[滔滔AI](http://www.talktalkai.com)；滔滔AI，为爱滔滔！💕</center>
+            """
+        )
+        with gr.Tab("🐶 - Bark拟声"):
             with gr.Row():
                 with gr.Column():
+                    placeholder = "想让Bark说些什么呢？"
+                    input_text = gr.Textbox(label="用作声音合成的文本", lines=4, placeholder=placeholder)
                 with gr.Column():
                     seedcomponent = gr.Number(label="Seed (default -1 = Random)", precision=0, value=-1)
                     convert_to_ssml_button = gr.Button("Convert Text to SSML")
             with gr.Row():
                 with gr.Column():
+                    gr.Markdown("查看Bark官方[语言库](https://suno-ai.notion.site/8b8e8749ed514b0cbf3f699013548683?v=bc67cff786b04b50b3ceb756fd05f68c)")
+                    speaker = gr.Dropdown(speakers_list, value=speakers_list[0], label="中英双语的不同声音供您选择")
                 with gr.Column():
+                    text_temp = gr.Slider(0.1, 1.0, value=0.7, label="Generation Temperature", info="1.0 more diverse, 0.1 more conservative")
                     waveform_temp = gr.Slider(0.1, 1.0, value=0.7, label="Waveform temperature", info="1.0 more diverse, 0.1 more conservative")
             with gr.Row():
                 with gr.Column():
+                    quick_gen_checkbox = gr.Checkbox(label="是否要快速合成语音", value=True)
                     settings_checkboxes = ["Use last generation as history", "Save generation as Voice"]
                     complete_settings = gr.CheckboxGroup(choices=settings_checkboxes, value=settings_checkboxes, label="Detailed Generation Settings", type="value", interactive=True, visible=False)
                 with gr.Column():
             with gr.Row():
                 with gr.Column():
+                    tts_create_button = gr.Button("开始声音真实复刻吧")
                 with gr.Column():
                     hidden_checkbox = gr.Checkbox(visible=False)
+                    button_stop_generation = gr.Button("停止生成")
             with gr.Row():
+                output_audio = gr.Audio(label="真实复刻的声音")
             with gr.Row():
+                inp1 = gr.Audio(label="请上传您喜欢的声音")
                 inp2 = output_audio
                 inp3 = output_audio
+                btn = gr.Button("开始生成专属声音吧")
+                out1 = gr.Audio(label="为你生成的专属声音")
             btn.click(voice_conversion, [inp1, inp2, inp3], [out1])
+            with gr.Row():
+                inp4 = out1
+                btn2 = gr.Button("对专属声音降噪吧")
+                out2 = gr.Audio(label="降噪后的专属声音")
+            btn2.click(speechbrain, [inp4], [out2])
+            with gr.Row():
+                with gr.Column():
+                    examples = [
+                        "Special meanings: [laughter] [laughs] [sighs] [music] [gasps] [clears throat] MAN: WOMAN:",
+                       "♪ Never gonna make you cry, never gonna say goodbye, never gonna tell a lie and hurt you ♪",
+                       "And now — a picture of a larch [laughter]",
+                       """
+                            WOMAN: I would like an oatmilk latte please.
+                            MAN: Wow, that's expensive!
+                       """,
+                       """<?xml version="1.0"?>
+    <speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis"
+             xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+             xsi:schemaLocation="http://www.w3.org/2001/10/synthesis
+                       http://www.w3.org/TR/speech-synthesis/synthesis.xsd"
+             xml:lang="en-US">
+    <voice name="en_speaker_9">Look at that drunk guy!</voice>
+    <voice name="en_speaker_3">Who is he?</voice>
+    <voice name="en_speaker_9">WOMAN: [clears throat] 10 years ago, he proposed me and I rejected him.</voice>
+    <voice name="en_speaker_3">Oh my God [laughs] he is still celebrating</voice>
+    </speak>"""
+                       ]
+                    examples = gr.Examples(examples=examples, inputs=input_text)
+        with gr.Tab("🤖 - 设置"):
             with gr.Row():
                 themes = gr.Dropdown(available_themes, label="Theme", info="Change needs complete restart", value=settings.selected_theme)
             with gr.Row():
                 button_apply_restart = gr.Button("Restart Server")
                 button_delete_files = gr.Button("Clear output folder")
+        gr.HTML('''
+            <div class="footer">
+                        <p>🌊🏞️🎶 - 江水东流急，滔滔无尽声。 明·顾璘
+                        </p>
+            </div>
+        ''')
         quick_gen_checkbox.change(fn=on_quick_gen_changed, inputs=quick_gen_checkbox, outputs=complete_settings)
         convert_to_ssml_button.click(convert_text_to_ssml, inputs=[input_text, speaker],outputs=input_text)
         gen_click = tts_create_button.click(generate_text_to_speech, inputs=[input_text, speaker, text_temp, waveform_temp, eos_prob, quick_gen_checkbox, complete_settings, seedcomponent],outputs=output_audio)