fffiloni commited on
Commit
5cbba27
1 Parent(s): 7f61595

add audio cleaning + record helpers

Browse files
Files changed (1) hide show
  1. app.py +128 -32
app.py CHANGED
@@ -6,6 +6,7 @@ import shutil
6
  #from huggingface_hub import snapshot_download
7
  import numpy as np
8
  from scipy.io import wavfile
 
9
  from pydub import AudioSegment
10
 
11
  file_upload_available = os.environ.get("ALLOW_FILE_UPLOAD")
@@ -22,23 +23,6 @@ with open("characters.json", "r") as file:
22
  for item in data
23
  ]
24
 
25
- """
26
- model_ids = [
27
- 'suno/bark',
28
- ]
29
-
30
- for model_id in model_ids:
31
- model_name = model_id.split('/')[-1]
32
- snapshot_download(model_id, local_dir=f'checkpoints/{model_name}')
33
-
34
- from TTS.tts.configs.bark_config import BarkConfig
35
- from TTS.tts.models.bark import Bark
36
-
37
- #os.environ['CUDA_VISIBLE_DEVICES'] = '1'
38
- config = BarkConfig()
39
- model = Bark.init_from_config(config)
40
- model.load_checkpoint(config, checkpoint_dir="checkpoints/bark", eval=True)
41
- """
42
  from TTS.api import TTS
43
  tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=True)
44
 
@@ -66,6 +50,66 @@ def cut_wav(input_path, max_duration):
66
 
67
  return output_path
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  def update_selection(selected_state: gr.SelectData):
70
  c_image = characters[selected_state.index]["image"]
71
  c_title = characters[selected_state.index]["title"]
@@ -74,10 +118,24 @@ def update_selection(selected_state: gr.SelectData):
74
  return c_title, selected_state
75
 
76
 
77
- def infer(prompt, input_wav_file):
78
-
79
- # Path to your WAV file
80
- source_path = input_wav_file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
  # Destination directory
83
  destination_directory = "bark_voices"
@@ -108,7 +166,7 @@ def infer(prompt, input_wav_file):
108
 
109
  tts_video = gr.make_waveform(audio="output.wav")
110
 
111
- return "output.wav", tts_video, gr.update(value=f"bark_voices/{file_name}/{contents[1]}", visible=True), gr.Group.update(visible=True)
112
 
113
  def infer_from_c(prompt, c_name):
114
 
@@ -265,18 +323,38 @@ with gr.Blocks(css=css) as demo:
265
  source="upload",
266
  interactive = False
267
  )
268
-
 
269
  submit_btn = gr.Button("Submit")
270
-
271
  with gr.Tab("Microphone"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
272
  micro_in = gr.Audio(
273
  label="Record voice to clone",
274
  type="filepath",
275
  source="microphone",
276
  interactive = True
277
  )
 
278
  micro_submit_btn = gr.Button("Submit")
279
-
 
 
 
 
280
  with gr.Tab("Voices Characters"):
281
  selected_state = gr.State()
282
  gallery_in = gr.Gallery(
@@ -308,6 +386,10 @@ with gr.Blocks(css=css) as demo:
308
  visible = False
309
  )
310
 
 
 
 
 
311
  character_name = gr.Textbox(
312
  label="Character Name",
313
  placeholder="Name that voice character",
@@ -334,29 +416,37 @@ with gr.Blocks(css=css) as demo:
334
  show_progress=False,
335
  )
336
 
337
-
 
338
 
339
  gr.Examples(
340
  examples = [
341
  [
342
  "Once upon a time, in a cozy little shell, lived a friendly crab named Crabby. Crabby loved his cozy home, but he always felt like something was missing.",
343
  "./examples/en_speaker_6.wav",
 
 
344
  ],
345
  [
346
  "It was a typical afternoon in the bustling city, the sun shining brightly through the windows of the packed courtroom. Three people sat at the bar, their faces etched with worry and anxiety. ",
347
  "./examples/en_speaker_9.wav",
 
 
348
  ],
349
  ],
350
  fn = infer,
351
  inputs = [
352
  prompt,
353
- audio_in
 
 
354
  ],
355
  outputs = [
356
  cloned_out,
357
  video_out,
358
  npz_file,
359
- share_group
 
360
  ],
361
  cache_examples = False
362
  )
@@ -381,13 +471,16 @@ with gr.Blocks(css=css) as demo:
381
  fn = infer,
382
  inputs = [
383
  prompt,
384
- audio_in
 
 
385
  ],
386
  outputs = [
387
  cloned_out,
388
  video_out,
389
  npz_file,
390
- share_group
 
391
  ]
392
  )
393
 
@@ -395,13 +488,16 @@ with gr.Blocks(css=css) as demo:
395
  fn = infer,
396
  inputs = [
397
  prompt,
398
- micro_in
 
 
399
  ],
400
  outputs = [
401
  cloned_out,
402
  video_out,
403
  npz_file,
404
- share_group
 
405
  ]
406
  )
407
 
 
6
  #from huggingface_hub import snapshot_download
7
  import numpy as np
8
  from scipy.io import wavfile
9
+ from scipy.io.wavfile import write, read
10
  from pydub import AudioSegment
11
 
12
  file_upload_available = os.environ.get("ALLOW_FILE_UPLOAD")
 
23
  for item in data
24
  ]
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  from TTS.api import TTS
27
  tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=True)
28
 
 
50
 
51
  return output_path
52
 
53
+ def load_hidden(audio_in):
54
+ return audio_in
55
+
56
+ def load_hidden_mic(audio_in):
57
+ print("MICRO IN HAS CHANGED")
58
+
59
+ library_path = 'bark_voices'
60
+ folder_name = 'audio-0-100'
61
+ second_folder_name = 'audio-0-100_cleaned'
62
+
63
+ folder_path = os.path.join(library_path, folder_name)
64
+ second_folder_path = os.path.join(library_path, second_folder_name)
65
+
66
+ if os.path.exists(folder_path):
67
+ try:
68
+ shutil.rmtree(folder_path)
69
+ print(f"Successfully deleted the folder: {folder_path}")
70
+ except OSError as e:
71
+ print(f"Error: {folder_path} - {e.strerror}")
72
+ else:
73
+ print(f"The folder does not exist: {folder_path}")
74
+
75
+ if os.path.exists(second_folder_path):
76
+ try:
77
+ shutil.rmtree(second_folder_path)
78
+ print(f"Successfully deleted the folder: {second_folder_path}")
79
+ except OSError as e:
80
+ print(f"Error: {second_folder_path} - {e.strerror}")
81
+ else:
82
+ print(f"The folder does not exist: {second_folder_path}")
83
+
84
+ return audio_in
85
+
86
+ def clear_clean_ckeck():
87
+ return False
88
+
89
+ def wipe_npz_file(folder_path):
90
+ if os.path.exists(folder_path):
91
+ #shutil.rmtree(folder_path)
92
+ print(folder_path)
93
+ else :
94
+ print("path does not exists yet")
95
+ print("YO")
96
+
97
+ def split_process(audio, chosen_out_track):
98
+ os.makedirs("out", exist_ok=True)
99
+ write('test.wav', audio[0], audio[1])
100
+ os.system("python3 -m demucs.separate -n mdx_extra_q -j 4 test.wav -o out")
101
+ #return "./out/mdx_extra_q/test/vocals.wav","./out/mdx_extra_q/test/bass.wav","./out/mdx_extra_q/test/drums.wav","./out/mdx_extra_q/test/other.wav"
102
+ if chosen_out_track == "vocals":
103
+ return "./out/mdx_extra_q/test/vocals.wav"
104
+ elif chosen_out_track == "bass":
105
+ return "./out/mdx_extra_q/test/bass.wav"
106
+ elif chosen_out_track == "drums":
107
+ return "./out/mdx_extra_q/test/drums.wav"
108
+ elif chosen_out_track == "other":
109
+ return "./out/mdx_extra_q/test/other.wav"
110
+ elif chosen_out_track == "all-in":
111
+ return "test.wav"
112
+
113
  def update_selection(selected_state: gr.SelectData):
114
  c_image = characters[selected_state.index]["image"]
115
  c_title = characters[selected_state.index]["title"]
 
118
  return c_title, selected_state
119
 
120
 
121
+ def infer(prompt, input_wav_file, clean_audio, hidden_numpy_audio):
122
+
123
+ if clean_audio is True :
124
+ # Extract the file name without the extension
125
+ new_name = os.path.splitext(os.path.basename(input_wav_file))[0]
126
+ check_name = os.path.join("bark_voices", f"{new_name}_cleaned")
127
+ if os.path.exists(check_name):
128
+ source_path = os.path.join(check_name, f"{new_name}_cleaned.wav")
129
+ else:
130
+ source_path = split_process(hidden_numpy_audio, "vocals")
131
+
132
+ # Rename the file
133
+ new_path = os.path.join(os.path.dirname(source_path), f"{new_name}_cleaned.wav")
134
+ os.rename(source_path, new_path)
135
+ source_path = new_path
136
+ else :
137
+ # Path to your WAV file
138
+ source_path = input_wav_file
139
 
140
  # Destination directory
141
  destination_directory = "bark_voices"
 
166
 
167
  tts_video = gr.make_waveform(audio="output.wav")
168
 
169
+ return "output.wav", tts_video, gr.update(value=f"bark_voices/{file_name}/{contents[1]}", visible=True), gr.Group.update(visible=True), destination_path
170
 
171
  def infer_from_c(prompt, c_name):
172
 
 
323
  source="upload",
324
  interactive = False
325
  )
326
+ clean_sample = gr.Checkbox(label="Clean sample ?", value=False)
327
+ hidden_audio_numpy = gr.Audio(type="numpy", visible=False)
328
  submit_btn = gr.Button("Submit")
329
+
330
  with gr.Tab("Microphone"):
331
+ texts_samples = gr.Textbox(label = "Helpers",
332
+ info = "You can read out loud one of these sentences if you do not know what to record :)",
333
+ value = """"Jazz, a quirky mix of groovy saxophones and wailing trumpets, echoes through the vibrant city streets."
334
+ "A majestic orchestra plays enchanting melodies, filling the air with harmony."
335
+ "The exquisite aroma of freshly baked bread wafts from a cozy bakery, enticing passersby."
336
+ "A thunderous roar shakes the ground as a massive jet takes off into the sky, leaving trails of white behind."
337
+ "Laughter erupts from a park where children play, their innocent voices rising like tinkling bells."
338
+ "Waves crash on the beach, and seagulls caw as they soar overhead, a symphony of nature's sounds."
339
+ "In the distance, a blacksmith hammers red-hot metal, the rhythmic clang punctuating the day."
340
+ "As evening falls, a soft hush blankets the world, crickets chirping in a soothing rhythm."
341
+ """,
342
+ interactive = False,
343
+ lines = 4
344
+ )
345
  micro_in = gr.Audio(
346
  label="Record voice to clone",
347
  type="filepath",
348
  source="microphone",
349
  interactive = True
350
  )
351
+ clean_micro = gr.Checkbox(label="Clean sample ?", value=False)
352
  micro_submit_btn = gr.Button("Submit")
353
+
354
+ audio_in.upload(fn=load_hidden, inputs=[audio_in], outputs=[hidden_audio_numpy])
355
+ micro_in.stop_recording(fn=load_hidden_mic, inputs=[micro_in], outputs=[hidden_audio_numpy])
356
+
357
+
358
  with gr.Tab("Voices Characters"):
359
  selected_state = gr.State()
360
  gallery_in = gr.Gallery(
 
386
  visible = False
387
  )
388
 
389
+ folder_path = gr.Textbox(visible=False)
390
+
391
+
392
+
393
  character_name = gr.Textbox(
394
  label="Character Name",
395
  placeholder="Name that voice character",
 
416
  show_progress=False,
417
  )
418
 
419
+ audio_in.change(fn=wipe_npz_file, inputs=[folder_path])
420
+ micro_in.clear(fn=wipe_npz_file, inputs=[folder_path])
421
 
422
  gr.Examples(
423
  examples = [
424
  [
425
  "Once upon a time, in a cozy little shell, lived a friendly crab named Crabby. Crabby loved his cozy home, but he always felt like something was missing.",
426
  "./examples/en_speaker_6.wav",
427
+ False,
428
+ None
429
  ],
430
  [
431
  "It was a typical afternoon in the bustling city, the sun shining brightly through the windows of the packed courtroom. Three people sat at the bar, their faces etched with worry and anxiety. ",
432
  "./examples/en_speaker_9.wav",
433
+ False,
434
+ None
435
  ],
436
  ],
437
  fn = infer,
438
  inputs = [
439
  prompt,
440
+ audio_in,
441
+ clean_sample,
442
+ hidden_audio_numpy
443
  ],
444
  outputs = [
445
  cloned_out,
446
  video_out,
447
  npz_file,
448
+ share_group,
449
+ folder_path
450
  ],
451
  cache_examples = False
452
  )
 
471
  fn = infer,
472
  inputs = [
473
  prompt,
474
+ audio_in,
475
+ clean_sample,
476
+ hidden_audio_numpy
477
  ],
478
  outputs = [
479
  cloned_out,
480
  video_out,
481
  npz_file,
482
+ share_group,
483
+ folder_path
484
  ]
485
  )
486
 
 
488
  fn = infer,
489
  inputs = [
490
  prompt,
491
+ micro_in,
492
+ clean_micro,
493
+ hidden_audio_numpy
494
  ],
495
  outputs = [
496
  cloned_out,
497
  video_out,
498
  npz_file,
499
+ share_group,
500
+ folder_path
501
  ]
502
  )
503