kevinwang676 commited on
Commit
52f3cb0
1 Parent(s): 5580fe1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -51
app.py CHANGED
@@ -14,6 +14,15 @@ import torch
14
  import pytorch_seed
15
  import time
16
 
 
 
 
 
 
 
 
 
 
17
  from xml.sax import saxutils
18
  from bark.api import generate_with_settings
19
  from bark.api import save_as_prompt
@@ -60,6 +69,14 @@ import subprocess
60
 
61
  OUTPUTFOLDER = "Outputs"
62
 
 
 
 
 
 
 
 
 
63
 
64
  def generate_text_to_speech(text, selected_speaker, text_temp, waveform_temp, eos_prob, quick_generation, complete_settings, seed, progress=gr.Progress(track_tqdm=True)):
65
  if text == None or len(text) < 1:
@@ -429,55 +446,35 @@ while run_server:
429
  # Create Gradio Blocks
430
 
431
  with gr.Blocks(title=f"{APPTITLE}", mode=f"{APPTITLE}", theme=settings.selected_theme) as barkgui:
432
- with gr.Row():
433
- with gr.Column():
434
- gr.Markdown(f"### [{APPTITLE}](https://github.com/C0untFloyd/bark-gui)")
435
- with gr.Column():
436
- gr.HTML(create_version_html(), elem_id="versions")
437
-
438
- with gr.Tab("TTS"):
 
 
 
439
  with gr.Row():
440
  with gr.Column():
441
- placeholder = "Enter text here."
442
- input_text = gr.Textbox(label="Input Text", lines=4, placeholder=placeholder)
443
  with gr.Column():
444
  seedcomponent = gr.Number(label="Seed (default -1 = Random)", precision=0, value=-1)
445
  convert_to_ssml_button = gr.Button("Convert Text to SSML")
446
- with gr.Row():
447
- with gr.Column():
448
- examples = [
449
- "Special meanings: [laughter] [laughs] [sighs] [music] [gasps] [clears throat] MAN: WOMAN:",
450
- "♪ Never gonna make you cry, never gonna say goodbye, never gonna tell a lie and hurt you ♪",
451
- "And now — a picture of a larch [laughter]",
452
- """
453
- WOMAN: I would like an oatmilk latte please.
454
- MAN: Wow, that's expensive!
455
- """,
456
- """<?xml version="1.0"?>
457
- <speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis"
458
- xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
459
- xsi:schemaLocation="http://www.w3.org/2001/10/synthesis
460
- http://www.w3.org/TR/speech-synthesis/synthesis.xsd"
461
- xml:lang="en-US">
462
- <voice name="en_speaker_9">Look at that drunk guy!</voice>
463
- <voice name="en_speaker_3">Who is he?</voice>
464
- <voice name="en_speaker_9">WOMAN: [clears throat] 10 years ago, he proposed me and I rejected him.</voice>
465
- <voice name="en_speaker_3">Oh my God [laughs] he is still celebrating</voice>
466
- </speak>"""
467
- ]
468
- examples = gr.Examples(examples=examples, inputs=input_text)
469
 
470
  with gr.Row():
471
  with gr.Column():
472
- gr.Markdown("[Voice Prompt Library](https://suno-ai.notion.site/8b8e8749ed514b0cbf3f699013548683?v=bc67cff786b04b50b3ceb756fd05f68c)")
473
- speaker = gr.Dropdown(speakers_list, value=speakers_list[0], label="Voice")
474
  with gr.Column():
475
- text_temp = gr.Slider(0.1, 1.0, value=0.6, label="Generation Temperature", info="1.0 more diverse, 0.1 more conservative")
476
  waveform_temp = gr.Slider(0.1, 1.0, value=0.7, label="Waveform temperature", info="1.0 more diverse, 0.1 more conservative")
477
 
478
  with gr.Row():
479
  with gr.Column():
480
- quick_gen_checkbox = gr.Checkbox(label="Quick Generation", value=True)
481
  settings_checkboxes = ["Use last generation as history", "Save generation as Voice"]
482
  complete_settings = gr.CheckboxGroup(choices=settings_checkboxes, value=settings_checkboxes, label="Detailed Generation Settings", type="value", interactive=True, visible=False)
483
  with gr.Column():
@@ -485,32 +482,54 @@ while run_server:
485
 
486
  with gr.Row():
487
  with gr.Column():
488
- tts_create_button = gr.Button("Generate")
489
  with gr.Column():
490
  hidden_checkbox = gr.Checkbox(visible=False)
491
- button_stop_generation = gr.Button("Stop generation")
492
  with gr.Row():
493
- output_audio = gr.Audio(label="Generated Audio")
494
 
495
  with gr.Row():
496
- inp1 = gr.Audio(label='Target Speaker - Reference Clip')
497
  inp2 = output_audio
498
  inp3 = output_audio
499
- btn = gr.Button("Generate")
500
- out1 = gr.Audio(label='Target Speaker - Converted Clip')
501
  btn.click(voice_conversion, [inp1, inp2, inp3], [out1])
502
-
503
 
 
 
 
 
 
 
 
504
 
505
- with gr.Tab("Clone Voice"):
506
- input_audio_filename = gr.Audio(label="Input audio.wav", source="upload", type="filepath")
507
- transcription_text = gr.Textbox(label="Transcription Text", lines=1, placeholder="Enter Text of your Audio Sample here...")
508
- initialname = "./bark/assets/prompts/custom/MeMyselfAndI"
509
- output_voice = gr.Textbox(label="Filename of trained Voice", lines=1, placeholder=initialname, value=initialname)
510
- clone_voice_button = gr.Button("Create Voice")
511
- dummy = gr.Text(label="Progress")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
512
 
513
- with gr.Tab("Settings"):
514
  with gr.Row():
515
  themes = gr.Dropdown(available_themes, label="Theme", info="Change needs complete restart", value=settings.selected_theme)
516
  with gr.Row():
@@ -529,6 +548,13 @@ while run_server:
529
  button_apply_restart = gr.Button("Restart Server")
530
  button_delete_files = gr.Button("Clear output folder")
531
 
 
 
 
 
 
 
 
532
  quick_gen_checkbox.change(fn=on_quick_gen_changed, inputs=quick_gen_checkbox, outputs=complete_settings)
533
  convert_to_ssml_button.click(convert_text_to_ssml, inputs=[input_text, speaker],outputs=input_text)
534
  gen_click = tts_create_button.click(generate_text_to_speech, inputs=[input_text, speaker, text_temp, waveform_temp, eos_prob, quick_gen_checkbox, complete_settings, seedcomponent],outputs=output_audio)
 
14
  import pytorch_seed
15
  import time
16
 
17
+ import torchaudio
18
+ from speechbrain.pretrained import SpectralMaskEnhancement
19
+
20
+ enhance_model = SpectralMaskEnhancement.from_hparams(
21
+ source="speechbrain/metricgan-plus-voicebank",
22
+ savedir="pretrained_models/metricgan-plus-voicebank",
23
+ run_opts={"device":"cuda"},
24
+ )
25
+
26
  from xml.sax import saxutils
27
  from bark.api import generate_with_settings
28
  from bark.api import save_as_prompt
 
69
 
70
  OUTPUTFOLDER = "Outputs"
71
 
72
+ def speechbrain(aud):
73
+ # Load and add fake batch dimension
74
+ noisy = enhance_model.load_audio(
75
+ aud
76
+ ).unsqueeze(0)
77
+ enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
78
+ torchaudio.save('enhanced.wav', enhanced.cpu(), 16000)
79
+ return 'enhanced.wav'
80
 
81
  def generate_text_to_speech(text, selected_speaker, text_temp, waveform_temp, eos_prob, quick_generation, complete_settings, seed, progress=gr.Progress(track_tqdm=True)):
82
  if text == None or len(text) < 1:
 
446
  # Create Gradio Blocks
447
 
448
  with gr.Blocks(title=f"{APPTITLE}", mode=f"{APPTITLE}", theme=settings.selected_theme) as barkgui:
449
+ gr.Markdown("# <center>🐶🥳🎶 - Bark拟声最新版,开启声音真实复刻的新纪元!</center>")
450
+ gr.Markdown("### <center>🦄 - [Bark](https://github.com/suno-ai/bark)拟声,能够实现语音、语调及说话情感的真实复刻</center>")
451
+ gr.Markdown(
452
+ f"""
453
+ ### <center>🤗 - Powered by [Bark Enhanced(https://github.com/C0untFloyd/bark-gui). Thanks to C0untFloyd.</center>
454
+ ### <center>1. 您可以复制该程序并用GPU运行: <a href="https://huggingface.co/spaces/{os.getenv('SPACE_ID')}?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a></center>
455
+ ### <center>2. 更多精彩应用,敬请关注[滔滔AI](http://www.talktalkai.com);滔滔AI,为爱滔滔!💕</center>
456
+ """
457
+ )
458
+ with gr.Tab("🐶 - Bark拟声"):
459
  with gr.Row():
460
  with gr.Column():
461
+ placeholder = "想让Bark说些什么呢?"
462
+ input_text = gr.Textbox(label="用作声音合成的文本", lines=4, placeholder=placeholder)
463
  with gr.Column():
464
  seedcomponent = gr.Number(label="Seed (default -1 = Random)", precision=0, value=-1)
465
  convert_to_ssml_button = gr.Button("Convert Text to SSML")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
466
 
467
  with gr.Row():
468
  with gr.Column():
469
+ gr.Markdown("查看Bark官方[语言库](https://suno-ai.notion.site/8b8e8749ed514b0cbf3f699013548683?v=bc67cff786b04b50b3ceb756fd05f68c)")
470
+ speaker = gr.Dropdown(speakers_list, value=speakers_list[0], label="中英双语的不同声音供您选择")
471
  with gr.Column():
472
+ text_temp = gr.Slider(0.1, 1.0, value=0.7, label="Generation Temperature", info="1.0 more diverse, 0.1 more conservative")
473
  waveform_temp = gr.Slider(0.1, 1.0, value=0.7, label="Waveform temperature", info="1.0 more diverse, 0.1 more conservative")
474
 
475
  with gr.Row():
476
  with gr.Column():
477
+ quick_gen_checkbox = gr.Checkbox(label="是否要快速合成语音", value=True)
478
  settings_checkboxes = ["Use last generation as history", "Save generation as Voice"]
479
  complete_settings = gr.CheckboxGroup(choices=settings_checkboxes, value=settings_checkboxes, label="Detailed Generation Settings", type="value", interactive=True, visible=False)
480
  with gr.Column():
 
482
 
483
  with gr.Row():
484
  with gr.Column():
485
+ tts_create_button = gr.Button("开始声音真实复刻吧")
486
  with gr.Column():
487
  hidden_checkbox = gr.Checkbox(visible=False)
488
+ button_stop_generation = gr.Button("停止生成")
489
  with gr.Row():
490
+ output_audio = gr.Audio(label="真实复刻的声音")
491
 
492
  with gr.Row():
493
+ inp1 = gr.Audio(label="请上传您喜欢的声音")
494
  inp2 = output_audio
495
  inp3 = output_audio
496
+ btn = gr.Button("开始生成专属声音吧")
497
+ out1 = gr.Audio(label="为你生成的专属声音")
498
  btn.click(voice_conversion, [inp1, inp2, inp3], [out1])
 
499
 
500
+ with gr.Row():
501
+ inp4 = out1
502
+ btn2 = gr.Button("对专属声音降噪吧")
503
+ out2 = gr.Audio(label="降噪后的专属声音")
504
+ btn2.click(speechbrain, [inp4], [out2])
505
+
506
+
507
 
508
+ with gr.Row():
509
+ with gr.Column():
510
+ examples = [
511
+ "Special meanings: [laughter] [laughs] [sighs] [music] [gasps] [clears throat] MAN: WOMAN:",
512
+ "♪ Never gonna make you cry, never gonna say goodbye, never gonna tell a lie and hurt you ♪",
513
+ "And now a picture of a larch [laughter]",
514
+ """
515
+ WOMAN: I would like an oatmilk latte please.
516
+ MAN: Wow, that's expensive!
517
+ """,
518
+ """<?xml version="1.0"?>
519
+ <speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis"
520
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
521
+ xsi:schemaLocation="http://www.w3.org/2001/10/synthesis
522
+ http://www.w3.org/TR/speech-synthesis/synthesis.xsd"
523
+ xml:lang="en-US">
524
+ <voice name="en_speaker_9">Look at that drunk guy!</voice>
525
+ <voice name="en_speaker_3">Who is he?</voice>
526
+ <voice name="en_speaker_9">WOMAN: [clears throat] 10 years ago, he proposed me and I rejected him.</voice>
527
+ <voice name="en_speaker_3">Oh my God [laughs] he is still celebrating</voice>
528
+ </speak>"""
529
+ ]
530
+ examples = gr.Examples(examples=examples, inputs=input_text)
531
 
532
+ with gr.Tab("🤖 - 设置"):
533
  with gr.Row():
534
  themes = gr.Dropdown(available_themes, label="Theme", info="Change needs complete restart", value=settings.selected_theme)
535
  with gr.Row():
 
548
  button_apply_restart = gr.Button("Restart Server")
549
  button_delete_files = gr.Button("Clear output folder")
550
 
551
+ gr.HTML('''
552
+ <div class="footer">
553
+ <p>🌊🏞️🎶 - 江水东流急,滔滔无尽声。 明·顾璘
554
+ </p>
555
+ </div>
556
+ ''')
557
+
558
  quick_gen_checkbox.change(fn=on_quick_gen_changed, inputs=quick_gen_checkbox, outputs=complete_settings)
559
  convert_to_ssml_button.click(convert_text_to_ssml, inputs=[input_text, speaker],outputs=input_text)
560
  gen_click = tts_create_button.click(generate_text_to_speech, inputs=[input_text, speaker, text_temp, waveform_temp, eos_prob, quick_gen_checkbox, complete_settings, seedcomponent],outputs=output_audio)