Eddycrack864 commited on
Commit
c276b83
1 Parent(s): 70da67e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +520 -525
app.py CHANGED
@@ -1,526 +1,521 @@
1
- import os
2
- import re
3
- import random
4
- from scipy.io.wavfile import write
5
- import gradio as gr
6
-
7
- roformer_models = {
8
- 'BS-Roformer-Viperx-1297.ckpt': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt',
9
- 'BS-Roformer-Viperx-1296.ckpt': 'model_bs_roformer_ep_368_sdr_12.9628.ckpt',
10
- 'BS-Roformer-Viperx-1053.ckpt': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt',
11
- 'Mel-Roformer-Viperx-1143.ckpt': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt'
12
- }
13
-
14
- mdx23c_models = [
15
- 'MDX23C_D1581.ckpt',
16
- 'MDX23C-8KFFT-InstVoc_HQ.ckpt',
17
- 'MDX23C-8KFFT-InstVoc_HQ_2.ckpt',
18
- ]
19
-
20
- mdxnet_models = [
21
- 'UVR-MDX-NET-Inst_full_292.onnx',
22
- 'UVR-MDX-NET_Inst_187_beta.onnx',
23
- 'UVR-MDX-NET_Inst_82_beta.onnx',
24
- 'UVR-MDX-NET_Inst_90_beta.onnx',
25
- 'UVR-MDX-NET_Main_340.onnx',
26
- 'UVR-MDX-NET_Main_390.onnx',
27
- 'UVR-MDX-NET_Main_406.onnx',
28
- 'UVR-MDX-NET_Main_427.onnx',
29
- 'UVR-MDX-NET_Main_438.onnx',
30
- 'UVR-MDX-NET-Inst_HQ_1.onnx',
31
- 'UVR-MDX-NET-Inst_HQ_2.onnx',
32
- 'UVR-MDX-NET-Inst_HQ_3.onnx',
33
- 'UVR-MDX-NET-Inst_HQ_4.onnx',
34
- 'UVR_MDXNET_Main.onnx',
35
- 'UVR-MDX-NET-Inst_Main.onnx',
36
- 'UVR_MDXNET_1_9703.onnx',
37
- 'UVR_MDXNET_2_9682.onnx',
38
- 'UVR_MDXNET_3_9662.onnx',
39
- 'UVR-MDX-NET-Inst_1.onnx',
40
- 'UVR-MDX-NET-Inst_2.onnx',
41
- 'UVR-MDX-NET-Inst_3.onnx',
42
- 'UVR_MDXNET_KARA.onnx',
43
- 'UVR_MDXNET_KARA_2.onnx',
44
- 'UVR_MDXNET_9482.onnx',
45
- 'UVR-MDX-NET-Voc_FT.onnx',
46
- 'Kim_Vocal_1.onnx',
47
- 'Kim_Vocal_2.onnx',
48
- 'Kim_Inst.onnx',
49
- 'Reverb_HQ_By_FoxJoy.onnx',
50
- 'UVR-MDX-NET_Crowd_HQ_1.onnx',
51
- 'kuielab_a_vocals.onnx',
52
- 'kuielab_a_other.onnx',
53
- 'kuielab_a_bass.onnx',
54
- 'kuielab_a_drums.onnx',
55
- 'kuielab_b_vocals.onnx',
56
- 'kuielab_b_other.onnx',
57
- 'kuielab_b_bass.onnx',
58
- 'kuielab_b_drums.onnx',
59
- ]
60
-
61
- vrarch_models = [
62
- '1_HP-UVR.pth',
63
- '2_HP-UVR.pth',
64
- '3_HP-Vocal-UVR.pth',
65
- '4_HP-Vocal-UVR.pth',
66
- '5_HP-Karaoke-UVR.pth',
67
- '6_HP-Karaoke-UVR.pth',
68
- '7_HP2-UVR.pth',
69
- '8_HP2-UVR.pth',
70
- '9_HP2-UVR.pth',
71
- '10_SP-UVR-2B-32000-1.pth',
72
- '11_SP-UVR-2B-32000-2.pth',
73
- '12_SP-UVR-3B-44100.pth',
74
- '13_SP-UVR-4B-44100-1.pth',
75
- '14_SP-UVR-4B-44100-2.pth',
76
- '15_SP-UVR-MID-44100-1.pth',
77
- '16_SP-UVR-MID-44100-2.pth',
78
- '17_HP-Wind_Inst-UVR.pth',
79
- 'UVR-De-Echo-Aggressive.pth',
80
- 'UVR-De-Echo-Normal.pth',
81
- 'UVR-DeEcho-DeReverb.pth',
82
- 'UVR-DeNoise-Lite.pth',
83
- 'UVR-DeNoise.pth',
84
- 'UVR-BVE-4B_SN-44100-1.pth',
85
- 'MGM_HIGHEND_v4.pth',
86
- 'MGM_LOWEND_A_v4.pth',
87
- 'MGM_LOWEND_B_v4.pth',
88
- 'MGM_MAIN_v4.pth',
89
- ]
90
-
91
- demucs_models = [
92
- 'htdemucs_ft.yaml',
93
- 'htdemucs.yaml',
94
- 'hdemucs_mmi.yaml',
95
- ]
96
-
97
- output_format = [
98
- 'wav',
99
- 'flac',
100
- 'mp3',
101
- ]
102
-
103
- mdxnet_overlap_values = [
104
- '0.25',
105
- '0.5',
106
- '0.75',
107
- '0.99',
108
- ]
109
-
110
- vrarch_window_size_values = [
111
- '320',
112
- '512',
113
- '1024',
114
- ]
115
-
116
- def roformer_separator(roformer_audio, roformer_model, roformer_output_format, roformer_overlap):
117
- files_list = []
118
- files_list.clear()
119
- directory = "./outputs"
120
- random_id = str(random.randint(10000, 99999))
121
- pattern = f"{random_id}"
122
- os.makedirs("outputs", exist_ok=True)
123
- write(f'{random_id}.wav', roformer_audio[0], roformer_audio[1])
124
- full_roformer_model = roformer_models[roformer_model]
125
- prompt = f"audio-separator {random_id}.wav --model_filename {full_roformer_model} --output_dir=./outputs --output_format={roformer_output_format} --normalization=0.9 --mdxc_overlap={roformer_overlap}"
126
- os.system(prompt)
127
-
128
- for file in os.listdir(directory):
129
- if re.search(pattern, file):
130
- files_list.append(os.path.join(directory, file))
131
-
132
- stem1_file = files_list[0]
133
- stem2_file = files_list[1]
134
-
135
- return stem1_file, stem2_file
136
-
137
- def mdxc_separator(mdx23c_audio, mdx23c_model, mdx23c_output_format, mdx23c_segment_size, mdx23c_overlap):
138
- files_list = []
139
- files_list.clear()
140
- directory = "./outputs"
141
- random_id = str(random.randint(10000, 99999))
142
- pattern = f"{random_id}"
143
- os.makedirs("outputs", exist_ok=True)
144
- write(f'{random_id}.wav', mdx23c_audio[0], mdx23c_audio[1])
145
- prompt = f"audio-separator {random_id}.wav --model_filename {mdx23c_model} --output_dir=./outputs --output_format={mdx23c_output_format} --normalization=0.9 --mdxc_segment_size={mdx23c_segment_size} --mdxc_overlap={mdx23c_overlap}"
146
- os.system(prompt)
147
-
148
- for file in os.listdir(directory):
149
- if re.search(pattern, file):
150
- files_list.append(os.path.join(directory, file))
151
-
152
- stem1_file = files_list[0]
153
- stem2_file = files_list[1]
154
-
155
- return stem1_file, stem2_file
156
-
157
- def mdxnet_separator(mdxnet_audio, mdxnet_model, mdxnet_output_format, mdxnet_segment_size, mdxnet_overlap, mdxnet_denoise):
158
- files_list = []
159
- files_list.clear()
160
- directory = "./outputs"
161
- random_id = str(random.randint(10000, 99999))
162
- pattern = f"{random_id}"
163
- os.makedirs("outputs", exist_ok=True)
164
- write(f'{random_id}.wav', mdxnet_audio[0], mdxnet_audio[1])
165
- prompt = f"audio-separator {random_id}.wav --model_filename {mdxnet_model} --output_dir=./outputs --output_format={mdxnet_output_format} --normalization=0.9 --mdxc_segment_size={mdxnet_segment_size} --mdxc_overlap={mdxnet_overlap}"
166
-
167
- if mdxnet_denoise:
168
- prompt += " --mdx_enable_denoise"
169
-
170
- os.system(prompt)
171
-
172
- for file in os.listdir(directory):
173
- if re.search(pattern, file):
174
- files_list.append(os.path.join(directory, file))
175
-
176
- stem1_file = files_list[0]
177
- stem2_file = files_list[1]
178
-
179
- return stem1_file, stem2_file
180
-
181
- def vrarch_separator(vrarch_audio, vrarch_model, vrarch_output_format, vrarch_window_size, vrarch_agression, vrarch_tta, vrarch_high_end_process):
182
- files_list = []
183
- files_list.clear()
184
- directory = "./outputs"
185
- random_id = str(random.randint(10000, 99999))
186
- pattern = f"{random_id}"
187
- os.makedirs("outputs", exist_ok=True)
188
- write(f'{random_id}.wav', vrarch_audio[0], vrarch_audio[1])
189
- prompt = f"audio-separator {random_id}.wav --model_filename {vrarch_model} --output_dir=./outputs --output_format={vrarch_output_format} --normalization=0.9 --vr_window_size={vrarch_window_size} --vr_aggression={vrarch_agression}"
190
-
191
- if vrarch_tta:
192
- prompt += " --vr_enable_tta"
193
- if vrarch_high_end_process:
194
- prompt += " --vr_high_end_process"
195
-
196
- os.system(prompt)
197
-
198
- for file in os.listdir(directory):
199
- if re.search(pattern, file):
200
- files_list.append(os.path.join(directory, file))
201
-
202
- stem1_file = files_list[0]
203
- stem2_file = files_list[1]
204
-
205
- return stem1_file, stem2_file
206
-
207
- def demucs_separator(demucs_audio, demucs_model, demucs_output_format, demucs_shifts, demucs_overlap):
208
- files_list = []
209
- files_list.clear()
210
- directory = "./outputs"
211
- random_id = str(random.randint(10000, 99999))
212
- pattern = f"{random_id}"
213
- os.makedirs("outputs", exist_ok=True)
214
- write(f'{random_id}.wav', demucs_audio[0], demucs_audio[1])
215
- prompt = f"audio-separator {random_id}.wav --model_filename {demucs_model} --output_dir=./outputs --output_format={demucs_output_format} --normalization=0.9 --demucs_shifts={demucs_shifts} --demucs_overlap={demucs_overlap}"
216
-
217
- os.system(prompt)
218
-
219
- for file in os.listdir(directory):
220
- if re.search(pattern, file):
221
- files_list.append(os.path.join(directory, file))
222
-
223
- stem1_file = files_list[0]
224
- stem2_file = files_list[1]
225
- stem3_file = files_list[2]
226
- stem4_file = files_list[3]
227
-
228
- return stem1_file, stem2_file, stem3_file, stem4_file
229
-
230
- with gr.Blocks(title="🎵 UVR5 UI 🎵") as app:
231
- gr.Markdown("<h1> 🎵 UVR5 UI 🎵 </h1>")
232
- with gr.Tabs():
233
- with gr.TabItem("BS/Mel Roformer"):
234
- with gr.Row():
235
- roformer_model = gr.Dropdown(
236
- label = "Select the Model",
237
- choices=list(roformer_models.keys()),
238
- interactive = True
239
- )
240
- roformer_output_format = gr.Dropdown(
241
- label = "Select the Output Format",
242
- choices = output_format,
243
- interactive = True
244
- )
245
- with gr.Row():
246
- roformer_overlap = gr.Slider(
247
- minimum = 2,
248
- maximum = 4,
249
- step = 1,
250
- label = "Overlap",
251
- info = "Amount of overlap between prediction windows.",
252
- value = 4,
253
- interactive = True
254
- )
255
- with gr.Row():
256
- roformer_audio = gr.Audio(
257
- label = "Input Audio",
258
- type = "numpy",
259
- interactive = True
260
- )
261
- with gr.Row():
262
- roformer_button = gr.Button("Separate!", variant = "primary")
263
- with gr.Row():
264
- roformer_stem1 = gr.Audio(
265
- show_download_button = True,
266
- interactive = False,
267
- label = "Stem 1",
268
- type = "filepath"
269
- )
270
- roformer_stem2 = gr.Audio(
271
- show_download_button = True,
272
- interactive = False,
273
- label = "Stem 2",
274
- type = "filepath"
275
- )
276
-
277
- roformer_button.click(roformer_separator, [roformer_audio, roformer_model, roformer_output_format, roformer_overlap], [roformer_stem1, roformer_stem2])
278
-
279
- with gr.TabItem("MDX23C"):
280
- with gr.Row():
281
- mdx23c_model = gr.Dropdown(
282
- label = "Select the Model",
283
- choices = mdx23c_models,
284
- interactive = True
285
- )
286
- mdx23c_output_format = gr.Dropdown(
287
- label = "Select the Output Format",
288
- choices = output_format,
289
- interactive = True
290
- )
291
- with gr.Row():
292
- mdx23c_segment_size = gr.Slider(
293
- minimum = 32,
294
- maximum = 4000,
295
- step = 32,
296
- label = "Segment Size",
297
- info = "Larger consumes more resources, but may give better results.",
298
- value = 256,
299
- interactive = True
300
- )
301
- mdx23c_overlap = gr.Slider(
302
- minimum = 2,
303
- maximum = 50,
304
- step = 1,
305
- label = "Overlap",
306
- info = "Amount of overlap between prediction windows.",
307
- value = 8,
308
- interactive = True
309
- )
310
- with gr.Row():
311
- mdx23c_audio = gr.Audio(
312
- label = "Input Audio",
313
- type = "numpy",
314
- interactive = True
315
- )
316
- with gr.Row():
317
- mdx23c_button = gr.Button("Separate!", variant = "primary")
318
- with gr.Row():
319
- mdx23c_stem1 = gr.Audio(
320
- show_download_button = True,
321
- interactive = False,
322
- label = "Stem 1",
323
- type = "filepath"
324
- )
325
- mdx23c_stem2 = gr.Audio(
326
- show_download_button = True,
327
- interactive = False,
328
- label = "Stem 2",
329
- type = "filepath"
330
- )
331
-
332
- mdx23c_button.click(mdxc_separator, [mdx23c_audio, mdx23c_model, mdx23c_output_format, mdx23c_segment_size, mdx23c_overlap], [mdx23c_stem1, mdx23c_stem2])
333
-
334
- with gr.TabItem("MDX-NET"):
335
- with gr.Row():
336
- mdxnet_model = gr.Dropdown(
337
- label = "Select the Model",
338
- choices = mdxnet_models,
339
- interactive = True
340
- )
341
- mdxnet_output_format = gr.Dropdown(
342
- label = "Select the Output Format",
343
- choices = output_format,
344
- interactive = True
345
- )
346
- with gr.Row():
347
- mdxnet_segment_size = gr.Slider(
348
- minimum = 32,
349
- maximum = 4000,
350
- step = 32,
351
- label = "Segment Size",
352
- info = "Larger consumes more resources, but may give better results.",
353
- value = 256,
354
- interactive = True
355
- )
356
- mdxnet_overlap = gr.Dropdown(
357
- label = "Overlap",
358
- choices = mdxnet_overlap_values,
359
- value = mdxnet_overlap_values[0],
360
- interactive = True
361
- )
362
- mdxnet_denoise = gr.Checkbox(
363
- label = "Denoise",
364
- info = "Enable denoising during separation.",
365
- value = True,
366
- interactive = True
367
- )
368
- with gr.Row():
369
- mdxnet_audio = gr.Audio(
370
- label = "Input Audio",
371
- type = "numpy",
372
- interactive = True
373
- )
374
- with gr.Row():
375
- mdxnet_button = gr.Button("Separate", variant = "primary")
376
- with gr.Row():
377
- mdxnet_stem1 = gr.Audio(
378
- show_download_button = True,
379
- interactive = False,
380
- label = "Stem 1",
381
- type = "filepath"
382
- )
383
- mdxnet_stem2 = gr.Audio(
384
- show_download_button = True,
385
- interactive = False,
386
- label = "Stem 2",
387
- type = "filepath"
388
- )
389
-
390
- mdxnet_button.click(mdxnet_separator, [mdxnet_audio, mdxnet_model, mdxnet_output_format, mdxnet_segment_size, mdxnet_overlap, mdxnet_denoise], [mdxnet_stem1, mdxnet_stem2])
391
-
392
- with gr.TabItem("VR ARCH"):
393
- with gr.Row():
394
- vrarch_model = gr.Dropdown(
395
- label = "Select the Model",
396
- choices = vrarch_models,
397
- interactive = True
398
- )
399
- vrarch_output_format = gr.Dropdown(
400
- label = "Select the Output Format",
401
- choices = output_format,
402
- interactive = True
403
- )
404
- with gr.Row():
405
- vrarch_window_size = gr.Dropdown(
406
- label = "Window Size",
407
- choices = vrarch_window_size_values,
408
- value = vrarch_window_size_values[0],
409
- interactive = True
410
- )
411
- vrarch_agression = gr.Slider(
412
- minimum = 1,
413
- maximum = 50,
414
- step = 1,
415
- label = "Agression",
416
- info = "Intensity of primary stem extraction.",
417
- value = 5,
418
- interactive = True
419
- )
420
- vrarch_tta = gr.Checkbox(
421
- label = "TTA",
422
- info = "Enable Test-Time-Augmentation; slow but improves quality.",
423
- value = True,
424
- visible = True,
425
- interactive = True,
426
- )
427
- vrarch_high_end_process = gr.Checkbox(
428
- label = "High End Process",
429
- info = "Mirror the missing frequency range of the output.",
430
- value = False,
431
- visible = True,
432
- interactive = True,
433
- )
434
- with gr.Row():
435
- vrarch_audio = gr.Audio(
436
- label = "Input Audio",
437
- type = "numpy",
438
- interactive = True
439
- )
440
- with gr.Row():
441
- vrarch_button = gr.Button("Separate!", variant = "primary")
442
- with gr.Row():
443
- vrarch_stem1 = gr.Audio(
444
- show_download_button = True,
445
- interactive = False,
446
- type = "filepath",
447
- label = "Stem 1"
448
- )
449
- vrarch_stem2 = gr.Audio(
450
- show_download_button = True,
451
- interactive = False,
452
- type = "filepath",
453
- label = "Stem 2"
454
- )
455
-
456
- vrarch_button.click(vrarch_separator, [vrarch_audio, vrarch_model, vrarch_output_format, vrarch_window_size, vrarch_agression, vrarch_tta, vrarch_high_end_process], [vrarch_stem1, vrarch_stem2])
457
-
458
- with gr.TabItem("Demucs"):
459
- with gr.Row():
460
- demucs_model = gr.Dropdown(
461
- label = "Select the Model",
462
- choices = demucs_models,
463
- interactive = True
464
- )
465
- demucs_output_format = gr.Dropdown(
466
- label = "Select the Output Format",
467
- choices = output_format,
468
- interactive = True
469
- )
470
- with gr.Row():
471
- demucs_shifts = gr.Slider(
472
- minimum = 1,
473
- maximum = 20,
474
- step = 1,
475
- label = "Shifts",
476
- info = "Number of predictions with random shifts, higher = slower but better quality.",
477
- value = 2,
478
- interactive = True
479
- )
480
- demucs_overlap = gr.Slider(
481
- minimum = 0.001,
482
- maximum = 0.999,
483
- step = 0.001,
484
- label = "Overlap",
485
- info = "Amount of overlap between prediction windows.",
486
- value = 0.025,
487
- interactive = True
488
- )
489
- with gr.Row():
490
- demucs_audio = gr.Audio(
491
- label = "Input Audio",
492
- type = "numpy",
493
- interactive = True
494
- )
495
- with gr.Row():
496
- demucs_button = gr.Button("Separate!", variant = "primary")
497
- with gr.Row():
498
- demucs_stem1 = gr.Audio(
499
- show_download_button = True,
500
- interactive = False,
501
- type = "filepath",
502
- label = "Stem 1"
503
- )
504
- demucs_stem2 = gr.Audio(
505
- show_download_button = True,
506
- interactive = False,
507
- type = "filepath",
508
- label = "Stem 2"
509
- )
510
- with gr.Row():
511
- demucs_stem3 = gr.Audio(
512
- show_download_button = True,
513
- interactive = False,
514
- type = "filepath",
515
- label = "Stem 3"
516
- )
517
- demucs_stem4 = gr.Audio(
518
- show_download_button = True,
519
- interactive = False,
520
- type = "filepath",
521
- label = "Stem 4"
522
- )
523
-
524
- demucs_button.click(vrarch_separator, [demucs_audio, demucs_model, demucs_output_format, demucs_shifts, demucs_overlap], [demucs_stem1, demucs_stem2, demucs_stem3, demucs_stem4])
525
-
526
  app.launch()
 
1
+ import os
2
+ import re
3
+ import random
4
+ from scipy.io.wavfile import write
5
+ import gradio as gr
6
+
7
+ roformer_models = {
8
+ 'BS-Roformer-Viperx-1297.ckpt': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt',
9
+ 'BS-Roformer-Viperx-1296.ckpt': 'model_bs_roformer_ep_368_sdr_12.9628.ckpt',
10
+ 'BS-Roformer-Viperx-1053.ckpt': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt',
11
+ 'Mel-Roformer-Viperx-1143.ckpt': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt'
12
+ }
13
+
14
+ mdx23c_models = [
15
+ 'MDX23C_D1581.ckpt',
16
+ 'MDX23C-8KFFT-InstVoc_HQ.ckpt',
17
+ 'MDX23C-8KFFT-InstVoc_HQ_2.ckpt',
18
+ ]
19
+
20
+ mdxnet_models = [
21
+ 'UVR-MDX-NET-Inst_full_292.onnx',
22
+ 'UVR-MDX-NET_Inst_187_beta.onnx',
23
+ 'UVR-MDX-NET_Inst_82_beta.onnx',
24
+ 'UVR-MDX-NET_Inst_90_beta.onnx',
25
+ 'UVR-MDX-NET_Main_340.onnx',
26
+ 'UVR-MDX-NET_Main_390.onnx',
27
+ 'UVR-MDX-NET_Main_406.onnx',
28
+ 'UVR-MDX-NET_Main_427.onnx',
29
+ 'UVR-MDX-NET_Main_438.onnx',
30
+ 'UVR-MDX-NET-Inst_HQ_1.onnx',
31
+ 'UVR-MDX-NET-Inst_HQ_2.onnx',
32
+ 'UVR-MDX-NET-Inst_HQ_3.onnx',
33
+ 'UVR-MDX-NET-Inst_HQ_4.onnx',
34
+ 'UVR_MDXNET_Main.onnx',
35
+ 'UVR-MDX-NET-Inst_Main.onnx',
36
+ 'UVR_MDXNET_1_9703.onnx',
37
+ 'UVR_MDXNET_2_9682.onnx',
38
+ 'UVR_MDXNET_3_9662.onnx',
39
+ 'UVR-MDX-NET-Inst_1.onnx',
40
+ 'UVR-MDX-NET-Inst_2.onnx',
41
+ 'UVR-MDX-NET-Inst_3.onnx',
42
+ 'UVR_MDXNET_KARA.onnx',
43
+ 'UVR_MDXNET_KARA_2.onnx',
44
+ 'UVR_MDXNET_9482.onnx',
45
+ 'UVR-MDX-NET-Voc_FT.onnx',
46
+ 'Kim_Vocal_1.onnx',
47
+ 'Kim_Vocal_2.onnx',
48
+ 'Kim_Inst.onnx',
49
+ 'Reverb_HQ_By_FoxJoy.onnx',
50
+ 'UVR-MDX-NET_Crowd_HQ_1.onnx',
51
+ 'kuielab_a_vocals.onnx',
52
+ 'kuielab_a_other.onnx',
53
+ 'kuielab_a_bass.onnx',
54
+ 'kuielab_a_drums.onnx',
55
+ 'kuielab_b_vocals.onnx',
56
+ 'kuielab_b_other.onnx',
57
+ 'kuielab_b_bass.onnx',
58
+ 'kuielab_b_drums.onnx',
59
+ ]
60
+
61
+ vrarch_models = [
62
+ '1_HP-UVR.pth',
63
+ '2_HP-UVR.pth',
64
+ '3_HP-Vocal-UVR.pth',
65
+ '4_HP-Vocal-UVR.pth',
66
+ '5_HP-Karaoke-UVR.pth',
67
+ '6_HP-Karaoke-UVR.pth',
68
+ '7_HP2-UVR.pth',
69
+ '8_HP2-UVR.pth',
70
+ '9_HP2-UVR.pth',
71
+ '10_SP-UVR-2B-32000-1.pth',
72
+ '11_SP-UVR-2B-32000-2.pth',
73
+ '12_SP-UVR-3B-44100.pth',
74
+ '13_SP-UVR-4B-44100-1.pth',
75
+ '14_SP-UVR-4B-44100-2.pth',
76
+ '15_SP-UVR-MID-44100-1.pth',
77
+ '16_SP-UVR-MID-44100-2.pth',
78
+ '17_HP-Wind_Inst-UVR.pth',
79
+ 'UVR-De-Echo-Aggressive.pth',
80
+ 'UVR-De-Echo-Normal.pth',
81
+ 'UVR-DeEcho-DeReverb.pth',
82
+ 'UVR-DeNoise-Lite.pth',
83
+ 'UVR-DeNoise.pth',
84
+ 'UVR-BVE-4B_SN-44100-1.pth',
85
+ 'MGM_HIGHEND_v4.pth',
86
+ 'MGM_LOWEND_A_v4.pth',
87
+ 'MGM_LOWEND_B_v4.pth',
88
+ 'MGM_MAIN_v4.pth',
89
+ ]
90
+
91
+ demucs_models = [
92
+ 'htdemucs_ft.yaml',
93
+ 'htdemucs.yaml',
94
+ 'hdemucs_mmi.yaml',
95
+ ]
96
+
97
+ output_format = [
98
+ 'wav',
99
+ 'flac',
100
+ 'mp3',
101
+ ]
102
+
103
+ mdxnet_overlap_values = [
104
+ '0.25',
105
+ '0.5',
106
+ '0.75',
107
+ '0.99',
108
+ ]
109
+
110
+ vrarch_window_size_values = [
111
+ '320',
112
+ '512',
113
+ '1024',
114
+ ]
115
+
116
+ def roformer_separator(roformer_audio, roformer_model, roformer_output_format, roformer_overlap):
117
+ files_list = []
118
+ files_list.clear()
119
+ directory = "./outputs"
120
+ random_id = str(random.randint(10000, 99999))
121
+ pattern = f"{random_id}"
122
+ os.makedirs("outputs", exist_ok=True)
123
+ write(f'{random_id}.wav', roformer_audio[0], roformer_audio[1])
124
+ full_roformer_model = roformer_models[roformer_model]
125
+ prompt = f"audio-separator {random_id}.wav --model_filename {full_roformer_model} --output_dir=./outputs --output_format={roformer_output_format} --normalization=0.9 --mdxc_overlap={roformer_overlap}"
126
+ os.system(prompt)
127
+
128
+ for file in os.listdir(directory):
129
+ if re.search(pattern, file):
130
+ files_list.append(os.path.join(directory, file))
131
+
132
+ stem1_file = files_list[0]
133
+ stem2_file = files_list[1]
134
+
135
+ return stem1_file, stem2_file
136
+
137
+ def mdxc_separator(mdx23c_audio, mdx23c_model, mdx23c_output_format, mdx23c_segment_size, mdx23c_overlap):
138
+ files_list = []
139
+ files_list.clear()
140
+ directory = "./outputs"
141
+ random_id = str(random.randint(10000, 99999))
142
+ pattern = f"{random_id}"
143
+ os.makedirs("outputs", exist_ok=True)
144
+ write(f'{random_id}.wav', mdx23c_audio[0], mdx23c_audio[1])
145
+ prompt = f"audio-separator {random_id}.wav --model_filename {mdx23c_model} --output_dir=./outputs --output_format={mdx23c_output_format} --normalization=0.9 --mdxc_segment_size={mdx23c_segment_size} --mdxc_overlap={mdx23c_overlap}"
146
+ os.system(prompt)
147
+
148
+ for file in os.listdir(directory):
149
+ if re.search(pattern, file):
150
+ files_list.append(os.path.join(directory, file))
151
+
152
+ stem1_file = files_list[0]
153
+ stem2_file = files_list[1]
154
+
155
+ return stem1_file, stem2_file
156
+
157
+ def mdxnet_separator(mdxnet_audio, mdxnet_model, mdxnet_output_format, mdxnet_segment_size, mdxnet_overlap, mdxnet_denoise):
158
+ files_list = []
159
+ files_list.clear()
160
+ directory = "./outputs"
161
+ random_id = str(random.randint(10000, 99999))
162
+ pattern = f"{random_id}"
163
+ os.makedirs("outputs", exist_ok=True)
164
+ write(f'{random_id}.wav', mdxnet_audio[0], mdxnet_audio[1])
165
+ prompt = f"audio-separator {random_id}.wav --model_filename {mdxnet_model} --output_dir=./outputs --output_format={mdxnet_output_format} --normalization=0.9 --mdxc_segment_size={mdxnet_segment_size} --mdxc_overlap={mdxnet_overlap}"
166
+
167
+ if mdxnet_denoise:
168
+ prompt += " --mdx_enable_denoise"
169
+
170
+ os.system(prompt)
171
+
172
+ for file in os.listdir(directory):
173
+ if re.search(pattern, file):
174
+ files_list.append(os.path.join(directory, file))
175
+
176
+ stem1_file = files_list[0]
177
+ stem2_file = files_list[1]
178
+
179
+ return stem1_file, stem2_file
180
+
181
+ def vrarch_separator(vrarch_audio, vrarch_model, vrarch_output_format, vrarch_window_size, vrarch_agression):
182
+ files_list = []
183
+ files_list.clear()
184
+ directory = "./outputs"
185
+ random_id = str(random.randint(10000, 99999))
186
+ pattern = f"{random_id}"
187
+ os.makedirs("outputs", exist_ok=True)
188
+ write(f'{random_id}.wav', vrarch_audio[0], vrarch_audio[1])
189
+ prompt = f"audio-separator {random_id}.wav --model_filename {vrarch_model} --output_dir=./outputs --output_format={vrarch_output_format} --normalization=0.9 --vr_window_size={vrarch_window_size} --vr_aggression={vrarch_agression}"
190
+
191
+ os.system(prompt)
192
+
193
+ for file in os.listdir(directory):
194
+ if re.search(pattern, file):
195
+ files_list.append(os.path.join(directory, file))
196
+
197
+ stem1_file = files_list[0]
198
+ stem2_file = files_list[1]
199
+
200
+ return stem1_file, stem2_file
201
+
202
+ def demucs_separator(demucs_audio, demucs_model, demucs_output_format, demucs_shifts, demucs_overlap):
203
+ files_list = []
204
+ files_list.clear()
205
+ directory = "./outputs"
206
+ random_id = str(random.randint(10000, 99999))
207
+ pattern = f"{random_id}"
208
+ os.makedirs("outputs", exist_ok=True)
209
+ write(f'{random_id}.wav', demucs_audio[0], demucs_audio[1])
210
+ prompt = f"audio-separator {random_id}.wav --model_filename {demucs_model} --output_dir=./outputs --output_format={demucs_output_format} --normalization=0.9 --demucs_shifts={demucs_shifts} --demucs_overlap={demucs_overlap}"
211
+
212
+ os.system(prompt)
213
+
214
+ for file in os.listdir(directory):
215
+ if re.search(pattern, file):
216
+ files_list.append(os.path.join(directory, file))
217
+
218
+ stem1_file = files_list[0]
219
+ stem2_file = files_list[1]
220
+ stem3_file = files_list[2]
221
+ stem4_file = files_list[3]
222
+
223
+ return stem1_file, stem2_file, stem3_file, stem4_file
224
+
225
+ with gr.Blocks(title="🎵 UVR5 UI 🎵") as app:
226
+ gr.Markdown("<h1> 🎵 UVR5 UI 🎵 </h1>")
227
+ with gr.Tabs():
228
+ with gr.TabItem("BS/Mel Roformer"):
229
+ with gr.Row():
230
+ roformer_model = gr.Dropdown(
231
+ label = "Select the Model",
232
+ choices=list(roformer_models.keys()),
233
+ interactive = True
234
+ )
235
+ roformer_output_format = gr.Dropdown(
236
+ label = "Select the Output Format",
237
+ choices = output_format,
238
+ interactive = True
239
+ )
240
+ with gr.Row():
241
+ roformer_overlap = gr.Slider(
242
+ minimum = 2,
243
+ maximum = 4,
244
+ step = 1,
245
+ label = "Overlap",
246
+ info = "Amount of overlap between prediction windows.",
247
+ value = 4,
248
+ interactive = True
249
+ )
250
+ with gr.Row():
251
+ roformer_audio = gr.Audio(
252
+ label = "Input Audio",
253
+ type = "numpy",
254
+ interactive = True
255
+ )
256
+ with gr.Row():
257
+ roformer_button = gr.Button("Separate!", variant = "primary")
258
+ with gr.Row():
259
+ roformer_stem1 = gr.Audio(
260
+ show_download_button = True,
261
+ interactive = False,
262
+ label = "Stem 1",
263
+ type = "filepath"
264
+ )
265
+ roformer_stem2 = gr.Audio(
266
+ show_download_button = True,
267
+ interactive = False,
268
+ label = "Stem 2",
269
+ type = "filepath"
270
+ )
271
+
272
+ roformer_button.click(roformer_separator, [roformer_audio, roformer_model, roformer_output_format, roformer_overlap], [roformer_stem1, roformer_stem2])
273
+
274
+ with gr.TabItem("MDX23C"):
275
+ with gr.Row():
276
+ mdx23c_model = gr.Dropdown(
277
+ label = "Select the Model",
278
+ choices = mdx23c_models,
279
+ interactive = True
280
+ )
281
+ mdx23c_output_format = gr.Dropdown(
282
+ label = "Select the Output Format",
283
+ choices = output_format,
284
+ interactive = True
285
+ )
286
+ with gr.Row():
287
+ mdx23c_segment_size = gr.Slider(
288
+ minimum = 32,
289
+ maximum = 4000,
290
+ step = 32,
291
+ label = "Segment Size",
292
+ info = "Larger consumes more resources, but may give better results.",
293
+ value = 256,
294
+ interactive = True
295
+ )
296
+ mdx23c_overlap = gr.Slider(
297
+ minimum = 2,
298
+ maximum = 50,
299
+ step = 1,
300
+ label = "Overlap",
301
+ info = "Amount of overlap between prediction windows.",
302
+ value = 8,
303
+ interactive = True
304
+ )
305
+ with gr.Row():
306
+ mdx23c_audio = gr.Audio(
307
+ label = "Input Audio",
308
+ type = "numpy",
309
+ interactive = True
310
+ )
311
+ with gr.Row():
312
+ mdx23c_button = gr.Button("Separate!", variant = "primary")
313
+ with gr.Row():
314
+ mdx23c_stem1 = gr.Audio(
315
+ show_download_button = True,
316
+ interactive = False,
317
+ label = "Stem 1",
318
+ type = "filepath"
319
+ )
320
+ mdx23c_stem2 = gr.Audio(
321
+ show_download_button = True,
322
+ interactive = False,
323
+ label = "Stem 2",
324
+ type = "filepath"
325
+ )
326
+
327
+ mdx23c_button.click(mdxc_separator, [mdx23c_audio, mdx23c_model, mdx23c_output_format, mdx23c_segment_size, mdx23c_overlap], [mdx23c_stem1, mdx23c_stem2])
328
+
329
+ with gr.TabItem("MDX-NET"):
330
+ with gr.Row():
331
+ mdxnet_model = gr.Dropdown(
332
+ label = "Select the Model",
333
+ choices = mdxnet_models,
334
+ interactive = True
335
+ )
336
+ mdxnet_output_format = gr.Dropdown(
337
+ label = "Select the Output Format",
338
+ choices = output_format,
339
+ interactive = True
340
+ )
341
+ with gr.Row():
342
+ mdxnet_segment_size = gr.Slider(
343
+ minimum = 32,
344
+ maximum = 4000,
345
+ step = 32,
346
+ label = "Segment Size",
347
+ info = "Larger consumes more resources, but may give better results.",
348
+ value = 256,
349
+ interactive = True
350
+ )
351
+ mdxnet_overlap = gr.Dropdown(
352
+ label = "Overlap",
353
+ choices = mdxnet_overlap_values,
354
+ value = mdxnet_overlap_values[0],
355
+ interactive = True
356
+ )
357
+ mdxnet_denoise = gr.Checkbox(
358
+ label = "Denoise",
359
+ info = "Enable denoising during separation.",
360
+ value = True,
361
+ interactive = True
362
+ )
363
+ with gr.Row():
364
+ mdxnet_audio = gr.Audio(
365
+ label = "Input Audio",
366
+ type = "numpy",
367
+ interactive = True
368
+ )
369
+ with gr.Row():
370
+ mdxnet_button = gr.Button("Separate", variant = "primary")
371
+ with gr.Row():
372
+ mdxnet_stem1 = gr.Audio(
373
+ show_download_button = True,
374
+ interactive = False,
375
+ label = "Stem 1",
376
+ type = "filepath"
377
+ )
378
+ mdxnet_stem2 = gr.Audio(
379
+ show_download_button = True,
380
+ interactive = False,
381
+ label = "Stem 2",
382
+ type = "filepath"
383
+ )
384
+
385
+ mdxnet_button.click(mdxnet_separator, [mdxnet_audio, mdxnet_model, mdxnet_output_format, mdxnet_segment_size, mdxnet_overlap, mdxnet_denoise], [mdxnet_stem1, mdxnet_stem2])
386
+
387
+ with gr.TabItem("VR ARCH"):
388
+ with gr.Row():
389
+ vrarch_model = gr.Dropdown(
390
+ label = "Select the Model",
391
+ choices = vrarch_models,
392
+ interactive = True
393
+ )
394
+ vrarch_output_format = gr.Dropdown(
395
+ label = "Select the Output Format",
396
+ choices = output_format,
397
+ interactive = True
398
+ )
399
+ with gr.Row():
400
+ vrarch_window_size = gr.Dropdown(
401
+ label = "Window Size",
402
+ choices = vrarch_window_size_values,
403
+ value = vrarch_window_size_values[0],
404
+ interactive = True
405
+ )
406
+ vrarch_agression = gr.Slider(
407
+ minimum = 1,
408
+ maximum = 50,
409
+ step = 1,
410
+ label = "Agression",
411
+ info = "Intensity of primary stem extraction.",
412
+ value = 5,
413
+ interactive = True
414
+ )
415
+ vrarch_tta = gr.Checkbox(
416
+ label = "TTA",
417
+ info = "Enable Test-Time-Augmentation; slow but improves quality.",
418
+ value = True,
419
+ visible = True,
420
+ interactive = True,
421
+ )
422
+ vrarch_high_end_process = gr.Checkbox(
423
+ label = "High End Process",
424
+ info = "Mirror the missing frequency range of the output.",
425
+ value = False,
426
+ visible = True,
427
+ interactive = True,
428
+ )
429
+ with gr.Row():
430
+ vrarch_audio = gr.Audio(
431
+ label = "Input Audio",
432
+ type = "numpy",
433
+ interactive = True
434
+ )
435
+ with gr.Row():
436
+ vrarch_button = gr.Button("Separate!", variant = "primary")
437
+ with gr.Row():
438
+ vrarch_stem1 = gr.Audio(
439
+ show_download_button = True,
440
+ interactive = False,
441
+ type = "filepath",
442
+ label = "Stem 1"
443
+ )
444
+ vrarch_stem2 = gr.Audio(
445
+ show_download_button = True,
446
+ interactive = False,
447
+ type = "filepath",
448
+ label = "Stem 2"
449
+ )
450
+
451
+ vrarch_button.click(vrarch_separator, [vrarch_audio, vrarch_model, vrarch_output_format, vrarch_window_size, vrarch_agression], [vrarch_stem1, vrarch_stem2])
452
+
453
+ with gr.TabItem("Demucs"):
454
+ with gr.Row():
455
+ demucs_model = gr.Dropdown(
456
+ label = "Select the Model",
457
+ choices = demucs_models,
458
+ interactive = True
459
+ )
460
+ demucs_output_format = gr.Dropdown(
461
+ label = "Select the Output Format",
462
+ choices = output_format,
463
+ interactive = True
464
+ )
465
+ with gr.Row():
466
+ demucs_shifts = gr.Slider(
467
+ minimum = 1,
468
+ maximum = 20,
469
+ step = 1,
470
+ label = "Shifts",
471
+ info = "Number of predictions with random shifts, higher = slower but better quality.",
472
+ value = 2,
473
+ interactive = True
474
+ )
475
+ demucs_overlap = gr.Slider(
476
+ minimum = 0.001,
477
+ maximum = 0.999,
478
+ step = 0.001,
479
+ label = "Overlap",
480
+ info = "Amount of overlap between prediction windows.",
481
+ value = 0.025,
482
+ interactive = True
483
+ )
484
+ with gr.Row():
485
+ demucs_audio = gr.Audio(
486
+ label = "Input Audio",
487
+ type = "numpy",
488
+ interactive = True
489
+ )
490
+ with gr.Row():
491
+ demucs_button = gr.Button("Separate!", variant = "primary")
492
+ with gr.Row():
493
+ demucs_stem1 = gr.Audio(
494
+ show_download_button = True,
495
+ interactive = False,
496
+ type = "filepath",
497
+ label = "Stem 1"
498
+ )
499
+ demucs_stem2 = gr.Audio(
500
+ show_download_button = True,
501
+ interactive = False,
502
+ type = "filepath",
503
+ label = "Stem 2"
504
+ )
505
+ with gr.Row():
506
+ demucs_stem3 = gr.Audio(
507
+ show_download_button = True,
508
+ interactive = False,
509
+ type = "filepath",
510
+ label = "Stem 3"
511
+ )
512
+ demucs_stem4 = gr.Audio(
513
+ show_download_button = True,
514
+ interactive = False,
515
+ type = "filepath",
516
+ label = "Stem 4"
517
+ )
518
+
519
+ demucs_button.click(vrarch_separator, [demucs_audio, demucs_model, demucs_output_format, demucs_shifts, demucs_overlap], [demucs_stem1, demucs_stem2, demucs_stem3, demucs_stem4])
520
+
 
 
 
 
 
521
  app.launch()