Eddycrack864 commited on
Commit
9f00d15
1 Parent(s): c276b83

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +525 -520
app.py CHANGED
@@ -1,521 +1,526 @@
1
- import os
2
- import re
3
- import random
4
- from scipy.io.wavfile import write
5
- import gradio as gr
6
-
7
- roformer_models = {
8
- 'BS-Roformer-Viperx-1297.ckpt': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt',
9
- 'BS-Roformer-Viperx-1296.ckpt': 'model_bs_roformer_ep_368_sdr_12.9628.ckpt',
10
- 'BS-Roformer-Viperx-1053.ckpt': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt',
11
- 'Mel-Roformer-Viperx-1143.ckpt': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt'
12
- }
13
-
14
- mdx23c_models = [
15
- 'MDX23C_D1581.ckpt',
16
- 'MDX23C-8KFFT-InstVoc_HQ.ckpt',
17
- 'MDX23C-8KFFT-InstVoc_HQ_2.ckpt',
18
- ]
19
-
20
- mdxnet_models = [
21
- 'UVR-MDX-NET-Inst_full_292.onnx',
22
- 'UVR-MDX-NET_Inst_187_beta.onnx',
23
- 'UVR-MDX-NET_Inst_82_beta.onnx',
24
- 'UVR-MDX-NET_Inst_90_beta.onnx',
25
- 'UVR-MDX-NET_Main_340.onnx',
26
- 'UVR-MDX-NET_Main_390.onnx',
27
- 'UVR-MDX-NET_Main_406.onnx',
28
- 'UVR-MDX-NET_Main_427.onnx',
29
- 'UVR-MDX-NET_Main_438.onnx',
30
- 'UVR-MDX-NET-Inst_HQ_1.onnx',
31
- 'UVR-MDX-NET-Inst_HQ_2.onnx',
32
- 'UVR-MDX-NET-Inst_HQ_3.onnx',
33
- 'UVR-MDX-NET-Inst_HQ_4.onnx',
34
- 'UVR_MDXNET_Main.onnx',
35
- 'UVR-MDX-NET-Inst_Main.onnx',
36
- 'UVR_MDXNET_1_9703.onnx',
37
- 'UVR_MDXNET_2_9682.onnx',
38
- 'UVR_MDXNET_3_9662.onnx',
39
- 'UVR-MDX-NET-Inst_1.onnx',
40
- 'UVR-MDX-NET-Inst_2.onnx',
41
- 'UVR-MDX-NET-Inst_3.onnx',
42
- 'UVR_MDXNET_KARA.onnx',
43
- 'UVR_MDXNET_KARA_2.onnx',
44
- 'UVR_MDXNET_9482.onnx',
45
- 'UVR-MDX-NET-Voc_FT.onnx',
46
- 'Kim_Vocal_1.onnx',
47
- 'Kim_Vocal_2.onnx',
48
- 'Kim_Inst.onnx',
49
- 'Reverb_HQ_By_FoxJoy.onnx',
50
- 'UVR-MDX-NET_Crowd_HQ_1.onnx',
51
- 'kuielab_a_vocals.onnx',
52
- 'kuielab_a_other.onnx',
53
- 'kuielab_a_bass.onnx',
54
- 'kuielab_a_drums.onnx',
55
- 'kuielab_b_vocals.onnx',
56
- 'kuielab_b_other.onnx',
57
- 'kuielab_b_bass.onnx',
58
- 'kuielab_b_drums.onnx',
59
- ]
60
-
61
- vrarch_models = [
62
- '1_HP-UVR.pth',
63
- '2_HP-UVR.pth',
64
- '3_HP-Vocal-UVR.pth',
65
- '4_HP-Vocal-UVR.pth',
66
- '5_HP-Karaoke-UVR.pth',
67
- '6_HP-Karaoke-UVR.pth',
68
- '7_HP2-UVR.pth',
69
- '8_HP2-UVR.pth',
70
- '9_HP2-UVR.pth',
71
- '10_SP-UVR-2B-32000-1.pth',
72
- '11_SP-UVR-2B-32000-2.pth',
73
- '12_SP-UVR-3B-44100.pth',
74
- '13_SP-UVR-4B-44100-1.pth',
75
- '14_SP-UVR-4B-44100-2.pth',
76
- '15_SP-UVR-MID-44100-1.pth',
77
- '16_SP-UVR-MID-44100-2.pth',
78
- '17_HP-Wind_Inst-UVR.pth',
79
- 'UVR-De-Echo-Aggressive.pth',
80
- 'UVR-De-Echo-Normal.pth',
81
- 'UVR-DeEcho-DeReverb.pth',
82
- 'UVR-DeNoise-Lite.pth',
83
- 'UVR-DeNoise.pth',
84
- 'UVR-BVE-4B_SN-44100-1.pth',
85
- 'MGM_HIGHEND_v4.pth',
86
- 'MGM_LOWEND_A_v4.pth',
87
- 'MGM_LOWEND_B_v4.pth',
88
- 'MGM_MAIN_v4.pth',
89
- ]
90
-
91
- demucs_models = [
92
- 'htdemucs_ft.yaml',
93
- 'htdemucs.yaml',
94
- 'hdemucs_mmi.yaml',
95
- ]
96
-
97
- output_format = [
98
- 'wav',
99
- 'flac',
100
- 'mp3',
101
- ]
102
-
103
- mdxnet_overlap_values = [
104
- '0.25',
105
- '0.5',
106
- '0.75',
107
- '0.99',
108
- ]
109
-
110
- vrarch_window_size_values = [
111
- '320',
112
- '512',
113
- '1024',
114
- ]
115
-
116
- def roformer_separator(roformer_audio, roformer_model, roformer_output_format, roformer_overlap):
117
- files_list = []
118
- files_list.clear()
119
- directory = "./outputs"
120
- random_id = str(random.randint(10000, 99999))
121
- pattern = f"{random_id}"
122
- os.makedirs("outputs", exist_ok=True)
123
- write(f'{random_id}.wav', roformer_audio[0], roformer_audio[1])
124
- full_roformer_model = roformer_models[roformer_model]
125
- prompt = f"audio-separator {random_id}.wav --model_filename {full_roformer_model} --output_dir=./outputs --output_format={roformer_output_format} --normalization=0.9 --mdxc_overlap={roformer_overlap}"
126
- os.system(prompt)
127
-
128
- for file in os.listdir(directory):
129
- if re.search(pattern, file):
130
- files_list.append(os.path.join(directory, file))
131
-
132
- stem1_file = files_list[0]
133
- stem2_file = files_list[1]
134
-
135
- return stem1_file, stem2_file
136
-
137
- def mdxc_separator(mdx23c_audio, mdx23c_model, mdx23c_output_format, mdx23c_segment_size, mdx23c_overlap):
138
- files_list = []
139
- files_list.clear()
140
- directory = "./outputs"
141
- random_id = str(random.randint(10000, 99999))
142
- pattern = f"{random_id}"
143
- os.makedirs("outputs", exist_ok=True)
144
- write(f'{random_id}.wav', mdx23c_audio[0], mdx23c_audio[1])
145
- prompt = f"audio-separator {random_id}.wav --model_filename {mdx23c_model} --output_dir=./outputs --output_format={mdx23c_output_format} --normalization=0.9 --mdxc_segment_size={mdx23c_segment_size} --mdxc_overlap={mdx23c_overlap}"
146
- os.system(prompt)
147
-
148
- for file in os.listdir(directory):
149
- if re.search(pattern, file):
150
- files_list.append(os.path.join(directory, file))
151
-
152
- stem1_file = files_list[0]
153
- stem2_file = files_list[1]
154
-
155
- return stem1_file, stem2_file
156
-
157
- def mdxnet_separator(mdxnet_audio, mdxnet_model, mdxnet_output_format, mdxnet_segment_size, mdxnet_overlap, mdxnet_denoise):
158
- files_list = []
159
- files_list.clear()
160
- directory = "./outputs"
161
- random_id = str(random.randint(10000, 99999))
162
- pattern = f"{random_id}"
163
- os.makedirs("outputs", exist_ok=True)
164
- write(f'{random_id}.wav', mdxnet_audio[0], mdxnet_audio[1])
165
- prompt = f"audio-separator {random_id}.wav --model_filename {mdxnet_model} --output_dir=./outputs --output_format={mdxnet_output_format} --normalization=0.9 --mdxc_segment_size={mdxnet_segment_size} --mdxc_overlap={mdxnet_overlap}"
166
-
167
- if mdxnet_denoise:
168
- prompt += " --mdx_enable_denoise"
169
-
170
- os.system(prompt)
171
-
172
- for file in os.listdir(directory):
173
- if re.search(pattern, file):
174
- files_list.append(os.path.join(directory, file))
175
-
176
- stem1_file = files_list[0]
177
- stem2_file = files_list[1]
178
-
179
- return stem1_file, stem2_file
180
-
181
- def vrarch_separator(vrarch_audio, vrarch_model, vrarch_output_format, vrarch_window_size, vrarch_agression):
182
- files_list = []
183
- files_list.clear()
184
- directory = "./outputs"
185
- random_id = str(random.randint(10000, 99999))
186
- pattern = f"{random_id}"
187
- os.makedirs("outputs", exist_ok=True)
188
- write(f'{random_id}.wav', vrarch_audio[0], vrarch_audio[1])
189
- prompt = f"audio-separator {random_id}.wav --model_filename {vrarch_model} --output_dir=./outputs --output_format={vrarch_output_format} --normalization=0.9 --vr_window_size={vrarch_window_size} --vr_aggression={vrarch_agression}"
190
-
191
- os.system(prompt)
192
-
193
- for file in os.listdir(directory):
194
- if re.search(pattern, file):
195
- files_list.append(os.path.join(directory, file))
196
-
197
- stem1_file = files_list[0]
198
- stem2_file = files_list[1]
199
-
200
- return stem1_file, stem2_file
201
-
202
- def demucs_separator(demucs_audio, demucs_model, demucs_output_format, demucs_shifts, demucs_overlap):
203
- files_list = []
204
- files_list.clear()
205
- directory = "./outputs"
206
- random_id = str(random.randint(10000, 99999))
207
- pattern = f"{random_id}"
208
- os.makedirs("outputs", exist_ok=True)
209
- write(f'{random_id}.wav', demucs_audio[0], demucs_audio[1])
210
- prompt = f"audio-separator {random_id}.wav --model_filename {demucs_model} --output_dir=./outputs --output_format={demucs_output_format} --normalization=0.9 --demucs_shifts={demucs_shifts} --demucs_overlap={demucs_overlap}"
211
-
212
- os.system(prompt)
213
-
214
- for file in os.listdir(directory):
215
- if re.search(pattern, file):
216
- files_list.append(os.path.join(directory, file))
217
-
218
- stem1_file = files_list[0]
219
- stem2_file = files_list[1]
220
- stem3_file = files_list[2]
221
- stem4_file = files_list[3]
222
-
223
- return stem1_file, stem2_file, stem3_file, stem4_file
224
-
225
- with gr.Blocks(title="🎵 UVR5 UI 🎵") as app:
226
- gr.Markdown("<h1> 🎵 UVR5 UI 🎵 </h1>")
227
- with gr.Tabs():
228
- with gr.TabItem("BS/Mel Roformer"):
229
- with gr.Row():
230
- roformer_model = gr.Dropdown(
231
- label = "Select the Model",
232
- choices=list(roformer_models.keys()),
233
- interactive = True
234
- )
235
- roformer_output_format = gr.Dropdown(
236
- label = "Select the Output Format",
237
- choices = output_format,
238
- interactive = True
239
- )
240
- with gr.Row():
241
- roformer_overlap = gr.Slider(
242
- minimum = 2,
243
- maximum = 4,
244
- step = 1,
245
- label = "Overlap",
246
- info = "Amount of overlap between prediction windows.",
247
- value = 4,
248
- interactive = True
249
- )
250
- with gr.Row():
251
- roformer_audio = gr.Audio(
252
- label = "Input Audio",
253
- type = "numpy",
254
- interactive = True
255
- )
256
- with gr.Row():
257
- roformer_button = gr.Button("Separate!", variant = "primary")
258
- with gr.Row():
259
- roformer_stem1 = gr.Audio(
260
- show_download_button = True,
261
- interactive = False,
262
- label = "Stem 1",
263
- type = "filepath"
264
- )
265
- roformer_stem2 = gr.Audio(
266
- show_download_button = True,
267
- interactive = False,
268
- label = "Stem 2",
269
- type = "filepath"
270
- )
271
-
272
- roformer_button.click(roformer_separator, [roformer_audio, roformer_model, roformer_output_format, roformer_overlap], [roformer_stem1, roformer_stem2])
273
-
274
- with gr.TabItem("MDX23C"):
275
- with gr.Row():
276
- mdx23c_model = gr.Dropdown(
277
- label = "Select the Model",
278
- choices = mdx23c_models,
279
- interactive = True
280
- )
281
- mdx23c_output_format = gr.Dropdown(
282
- label = "Select the Output Format",
283
- choices = output_format,
284
- interactive = True
285
- )
286
- with gr.Row():
287
- mdx23c_segment_size = gr.Slider(
288
- minimum = 32,
289
- maximum = 4000,
290
- step = 32,
291
- label = "Segment Size",
292
- info = "Larger consumes more resources, but may give better results.",
293
- value = 256,
294
- interactive = True
295
- )
296
- mdx23c_overlap = gr.Slider(
297
- minimum = 2,
298
- maximum = 50,
299
- step = 1,
300
- label = "Overlap",
301
- info = "Amount of overlap between prediction windows.",
302
- value = 8,
303
- interactive = True
304
- )
305
- with gr.Row():
306
- mdx23c_audio = gr.Audio(
307
- label = "Input Audio",
308
- type = "numpy",
309
- interactive = True
310
- )
311
- with gr.Row():
312
- mdx23c_button = gr.Button("Separate!", variant = "primary")
313
- with gr.Row():
314
- mdx23c_stem1 = gr.Audio(
315
- show_download_button = True,
316
- interactive = False,
317
- label = "Stem 1",
318
- type = "filepath"
319
- )
320
- mdx23c_stem2 = gr.Audio(
321
- show_download_button = True,
322
- interactive = False,
323
- label = "Stem 2",
324
- type = "filepath"
325
- )
326
-
327
- mdx23c_button.click(mdxc_separator, [mdx23c_audio, mdx23c_model, mdx23c_output_format, mdx23c_segment_size, mdx23c_overlap], [mdx23c_stem1, mdx23c_stem2])
328
-
329
- with gr.TabItem("MDX-NET"):
330
- with gr.Row():
331
- mdxnet_model = gr.Dropdown(
332
- label = "Select the Model",
333
- choices = mdxnet_models,
334
- interactive = True
335
- )
336
- mdxnet_output_format = gr.Dropdown(
337
- label = "Select the Output Format",
338
- choices = output_format,
339
- interactive = True
340
- )
341
- with gr.Row():
342
- mdxnet_segment_size = gr.Slider(
343
- minimum = 32,
344
- maximum = 4000,
345
- step = 32,
346
- label = "Segment Size",
347
- info = "Larger consumes more resources, but may give better results.",
348
- value = 256,
349
- interactive = True
350
- )
351
- mdxnet_overlap = gr.Dropdown(
352
- label = "Overlap",
353
- choices = mdxnet_overlap_values,
354
- value = mdxnet_overlap_values[0],
355
- interactive = True
356
- )
357
- mdxnet_denoise = gr.Checkbox(
358
- label = "Denoise",
359
- info = "Enable denoising during separation.",
360
- value = True,
361
- interactive = True
362
- )
363
- with gr.Row():
364
- mdxnet_audio = gr.Audio(
365
- label = "Input Audio",
366
- type = "numpy",
367
- interactive = True
368
- )
369
- with gr.Row():
370
- mdxnet_button = gr.Button("Separate", variant = "primary")
371
- with gr.Row():
372
- mdxnet_stem1 = gr.Audio(
373
- show_download_button = True,
374
- interactive = False,
375
- label = "Stem 1",
376
- type = "filepath"
377
- )
378
- mdxnet_stem2 = gr.Audio(
379
- show_download_button = True,
380
- interactive = False,
381
- label = "Stem 2",
382
- type = "filepath"
383
- )
384
-
385
- mdxnet_button.click(mdxnet_separator, [mdxnet_audio, mdxnet_model, mdxnet_output_format, mdxnet_segment_size, mdxnet_overlap, mdxnet_denoise], [mdxnet_stem1, mdxnet_stem2])
386
-
387
- with gr.TabItem("VR ARCH"):
388
- with gr.Row():
389
- vrarch_model = gr.Dropdown(
390
- label = "Select the Model",
391
- choices = vrarch_models,
392
- interactive = True
393
- )
394
- vrarch_output_format = gr.Dropdown(
395
- label = "Select the Output Format",
396
- choices = output_format,
397
- interactive = True
398
- )
399
- with gr.Row():
400
- vrarch_window_size = gr.Dropdown(
401
- label = "Window Size",
402
- choices = vrarch_window_size_values,
403
- value = vrarch_window_size_values[0],
404
- interactive = True
405
- )
406
- vrarch_agression = gr.Slider(
407
- minimum = 1,
408
- maximum = 50,
409
- step = 1,
410
- label = "Agression",
411
- info = "Intensity of primary stem extraction.",
412
- value = 5,
413
- interactive = True
414
- )
415
- vrarch_tta = gr.Checkbox(
416
- label = "TTA",
417
- info = "Enable Test-Time-Augmentation; slow but improves quality.",
418
- value = True,
419
- visible = True,
420
- interactive = True,
421
- )
422
- vrarch_high_end_process = gr.Checkbox(
423
- label = "High End Process",
424
- info = "Mirror the missing frequency range of the output.",
425
- value = False,
426
- visible = True,
427
- interactive = True,
428
- )
429
- with gr.Row():
430
- vrarch_audio = gr.Audio(
431
- label = "Input Audio",
432
- type = "numpy",
433
- interactive = True
434
- )
435
- with gr.Row():
436
- vrarch_button = gr.Button("Separate!", variant = "primary")
437
- with gr.Row():
438
- vrarch_stem1 = gr.Audio(
439
- show_download_button = True,
440
- interactive = False,
441
- type = "filepath",
442
- label = "Stem 1"
443
- )
444
- vrarch_stem2 = gr.Audio(
445
- show_download_button = True,
446
- interactive = False,
447
- type = "filepath",
448
- label = "Stem 2"
449
- )
450
-
451
- vrarch_button.click(vrarch_separator, [vrarch_audio, vrarch_model, vrarch_output_format, vrarch_window_size, vrarch_agression], [vrarch_stem1, vrarch_stem2])
452
-
453
- with gr.TabItem("Demucs"):
454
- with gr.Row():
455
- demucs_model = gr.Dropdown(
456
- label = "Select the Model",
457
- choices = demucs_models,
458
- interactive = True
459
- )
460
- demucs_output_format = gr.Dropdown(
461
- label = "Select the Output Format",
462
- choices = output_format,
463
- interactive = True
464
- )
465
- with gr.Row():
466
- demucs_shifts = gr.Slider(
467
- minimum = 1,
468
- maximum = 20,
469
- step = 1,
470
- label = "Shifts",
471
- info = "Number of predictions with random shifts, higher = slower but better quality.",
472
- value = 2,
473
- interactive = True
474
- )
475
- demucs_overlap = gr.Slider(
476
- minimum = 0.001,
477
- maximum = 0.999,
478
- step = 0.001,
479
- label = "Overlap",
480
- info = "Amount of overlap between prediction windows.",
481
- value = 0.025,
482
- interactive = True
483
- )
484
- with gr.Row():
485
- demucs_audio = gr.Audio(
486
- label = "Input Audio",
487
- type = "numpy",
488
- interactive = True
489
- )
490
- with gr.Row():
491
- demucs_button = gr.Button("Separate!", variant = "primary")
492
- with gr.Row():
493
- demucs_stem1 = gr.Audio(
494
- show_download_button = True,
495
- interactive = False,
496
- type = "filepath",
497
- label = "Stem 1"
498
- )
499
- demucs_stem2 = gr.Audio(
500
- show_download_button = True,
501
- interactive = False,
502
- type = "filepath",
503
- label = "Stem 2"
504
- )
505
- with gr.Row():
506
- demucs_stem3 = gr.Audio(
507
- show_download_button = True,
508
- interactive = False,
509
- type = "filepath",
510
- label = "Stem 3"
511
- )
512
- demucs_stem4 = gr.Audio(
513
- show_download_button = True,
514
- interactive = False,
515
- type = "filepath",
516
- label = "Stem 4"
517
- )
518
-
519
- demucs_button.click(vrarch_separator, [demucs_audio, demucs_model, demucs_output_format, demucs_shifts, demucs_overlap], [demucs_stem1, demucs_stem2, demucs_stem3, demucs_stem4])
520
-
 
 
 
 
 
521
  app.launch()
 
1
+ import os
2
+ import re
3
+ import random
4
+ from scipy.io.wavfile import write
5
+ import gradio as gr
6
+
7
+ roformer_models = {
8
+ 'BS-Roformer-Viperx-1297.ckpt': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt',
9
+ 'BS-Roformer-Viperx-1296.ckpt': 'model_bs_roformer_ep_368_sdr_12.9628.ckpt',
10
+ 'BS-Roformer-Viperx-1053.ckpt': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt',
11
+ 'Mel-Roformer-Viperx-1143.ckpt': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt'
12
+ }
13
+
14
+ mdx23c_models = [
15
+ 'MDX23C_D1581.ckpt',
16
+ 'MDX23C-8KFFT-InstVoc_HQ.ckpt',
17
+ 'MDX23C-8KFFT-InstVoc_HQ_2.ckpt',
18
+ ]
19
+
20
+ mdxnet_models = [
21
+ 'UVR-MDX-NET-Inst_full_292.onnx',
22
+ 'UVR-MDX-NET_Inst_187_beta.onnx',
23
+ 'UVR-MDX-NET_Inst_82_beta.onnx',
24
+ 'UVR-MDX-NET_Inst_90_beta.onnx',
25
+ 'UVR-MDX-NET_Main_340.onnx',
26
+ 'UVR-MDX-NET_Main_390.onnx',
27
+ 'UVR-MDX-NET_Main_406.onnx',
28
+ 'UVR-MDX-NET_Main_427.onnx',
29
+ 'UVR-MDX-NET_Main_438.onnx',
30
+ 'UVR-MDX-NET-Inst_HQ_1.onnx',
31
+ 'UVR-MDX-NET-Inst_HQ_2.onnx',
32
+ 'UVR-MDX-NET-Inst_HQ_3.onnx',
33
+ 'UVR-MDX-NET-Inst_HQ_4.onnx',
34
+ 'UVR_MDXNET_Main.onnx',
35
+ 'UVR-MDX-NET-Inst_Main.onnx',
36
+ 'UVR_MDXNET_1_9703.onnx',
37
+ 'UVR_MDXNET_2_9682.onnx',
38
+ 'UVR_MDXNET_3_9662.onnx',
39
+ 'UVR-MDX-NET-Inst_1.onnx',
40
+ 'UVR-MDX-NET-Inst_2.onnx',
41
+ 'UVR-MDX-NET-Inst_3.onnx',
42
+ 'UVR_MDXNET_KARA.onnx',
43
+ 'UVR_MDXNET_KARA_2.onnx',
44
+ 'UVR_MDXNET_9482.onnx',
45
+ 'UVR-MDX-NET-Voc_FT.onnx',
46
+ 'Kim_Vocal_1.onnx',
47
+ 'Kim_Vocal_2.onnx',
48
+ 'Kim_Inst.onnx',
49
+ 'Reverb_HQ_By_FoxJoy.onnx',
50
+ 'UVR-MDX-NET_Crowd_HQ_1.onnx',
51
+ 'kuielab_a_vocals.onnx',
52
+ 'kuielab_a_other.onnx',
53
+ 'kuielab_a_bass.onnx',
54
+ 'kuielab_a_drums.onnx',
55
+ 'kuielab_b_vocals.onnx',
56
+ 'kuielab_b_other.onnx',
57
+ 'kuielab_b_bass.onnx',
58
+ 'kuielab_b_drums.onnx',
59
+ ]
60
+
61
+ vrarch_models = [
62
+ '1_HP-UVR.pth',
63
+ '2_HP-UVR.pth',
64
+ '3_HP-Vocal-UVR.pth',
65
+ '4_HP-Vocal-UVR.pth',
66
+ '5_HP-Karaoke-UVR.pth',
67
+ '6_HP-Karaoke-UVR.pth',
68
+ '7_HP2-UVR.pth',
69
+ '8_HP2-UVR.pth',
70
+ '9_HP2-UVR.pth',
71
+ '10_SP-UVR-2B-32000-1.pth',
72
+ '11_SP-UVR-2B-32000-2.pth',
73
+ '12_SP-UVR-3B-44100.pth',
74
+ '13_SP-UVR-4B-44100-1.pth',
75
+ '14_SP-UVR-4B-44100-2.pth',
76
+ '15_SP-UVR-MID-44100-1.pth',
77
+ '16_SP-UVR-MID-44100-2.pth',
78
+ '17_HP-Wind_Inst-UVR.pth',
79
+ 'UVR-De-Echo-Aggressive.pth',
80
+ 'UVR-De-Echo-Normal.pth',
81
+ 'UVR-DeEcho-DeReverb.pth',
82
+ 'UVR-DeNoise-Lite.pth',
83
+ 'UVR-DeNoise.pth',
84
+ 'UVR-BVE-4B_SN-44100-1.pth',
85
+ 'MGM_HIGHEND_v4.pth',
86
+ 'MGM_LOWEND_A_v4.pth',
87
+ 'MGM_LOWEND_B_v4.pth',
88
+ 'MGM_MAIN_v4.pth',
89
+ ]
90
+
91
+ demucs_models = [
92
+ 'htdemucs_ft.yaml',
93
+ 'htdemucs.yaml',
94
+ 'hdemucs_mmi.yaml',
95
+ ]
96
+
97
+ output_format = [
98
+ 'wav',
99
+ 'flac',
100
+ 'mp3',
101
+ ]
102
+
103
+ mdxnet_overlap_values = [
104
+ '0.25',
105
+ '0.5',
106
+ '0.75',
107
+ '0.99',
108
+ ]
109
+
110
+ vrarch_window_size_values = [
111
+ '320',
112
+ '512',
113
+ '1024',
114
+ ]
115
+
116
+ def roformer_separator(roformer_audio, roformer_model, roformer_output_format, roformer_overlap):
117
+ files_list = []
118
+ files_list.clear()
119
+ directory = "./outputs"
120
+ random_id = str(random.randint(10000, 99999))
121
+ pattern = f"{random_id}"
122
+ os.makedirs("outputs", exist_ok=True)
123
+ write(f'{random_id}.wav', roformer_audio[0], roformer_audio[1])
124
+ full_roformer_model = roformer_models[roformer_model]
125
+ prompt = f"audio-separator {random_id}.wav --model_filename {full_roformer_model} --output_dir=./outputs --output_format={roformer_output_format} --normalization=0.9 --mdxc_overlap={roformer_overlap}"
126
+ os.system(prompt)
127
+
128
+ for file in os.listdir(directory):
129
+ if re.search(pattern, file):
130
+ files_list.append(os.path.join(directory, file))
131
+
132
+ stem1_file = files_list[0]
133
+ stem2_file = files_list[1]
134
+
135
+ return stem1_file, stem2_file
136
+
137
+ def mdxc_separator(mdx23c_audio, mdx23c_model, mdx23c_output_format, mdx23c_segment_size, mdx23c_overlap):
138
+ files_list = []
139
+ files_list.clear()
140
+ directory = "./outputs"
141
+ random_id = str(random.randint(10000, 99999))
142
+ pattern = f"{random_id}"
143
+ os.makedirs("outputs", exist_ok=True)
144
+ write(f'{random_id}.wav', mdx23c_audio[0], mdx23c_audio[1])
145
+ prompt = f"audio-separator {random_id}.wav --model_filename {mdx23c_model} --output_dir=./outputs --output_format={mdx23c_output_format} --normalization=0.9 --mdxc_segment_size={mdx23c_segment_size} --mdxc_overlap={mdx23c_overlap}"
146
+ os.system(prompt)
147
+
148
+ for file in os.listdir(directory):
149
+ if re.search(pattern, file):
150
+ files_list.append(os.path.join(directory, file))
151
+
152
+ stem1_file = files_list[0]
153
+ stem2_file = files_list[1]
154
+
155
+ return stem1_file, stem2_file
156
+
157
+ def mdxnet_separator(mdxnet_audio, mdxnet_model, mdxnet_output_format, mdxnet_segment_size, mdxnet_overlap, mdxnet_denoise):
158
+ files_list = []
159
+ files_list.clear()
160
+ directory = "./outputs"
161
+ random_id = str(random.randint(10000, 99999))
162
+ pattern = f"{random_id}"
163
+ os.makedirs("outputs", exist_ok=True)
164
+ write(f'{random_id}.wav', mdxnet_audio[0], mdxnet_audio[1])
165
+ prompt = f"audio-separator {random_id}.wav --model_filename {mdxnet_model} --output_dir=./outputs --output_format={mdxnet_output_format} --normalization=0.9 --mdx_segment_size={mdxnet_segment_size} --mdx_overlap={mdxnet_overlap}"
166
+
167
+ if mdxnet_denoise:
168
+ prompt += " --mdx_enable_denoise"
169
+
170
+ os.system(prompt)
171
+
172
+ for file in os.listdir(directory):
173
+ if re.search(pattern, file):
174
+ files_list.append(os.path.join(directory, file))
175
+
176
+ stem1_file = files_list[0]
177
+ stem2_file = files_list[1]
178
+
179
+ return stem1_file, stem2_file
180
+
181
+ def vrarch_separator(vrarch_audio, vrarch_model, vrarch_output_format, vrarch_window_size, vrarch_agression, vrarch_tta, vrarch_high_end_process):
182
+ files_list = []
183
+ files_list.clear()
184
+ directory = "./outputs"
185
+ random_id = str(random.randint(10000, 99999))
186
+ pattern = f"{random_id}"
187
+ os.makedirs("outputs", exist_ok=True)
188
+ write(f'{random_id}.wav', vrarch_audio[0], vrarch_audio[1])
189
+ prompt = f"audio-separator {random_id}.wav --model_filename {vrarch_model} --output_dir=./outputs --output_format={vrarch_output_format} --normalization=0.9 --vr_window_size={vrarch_window_size} --vr_aggression={vrarch_agression}"
190
+
191
+ if vrarch_tta:
192
+ prompt += " --vr_enable_tta"
193
+ if vrarch_high_end_process:
194
+ prompt += " --vr_high_end_process"
195
+
196
+ os.system(prompt)
197
+
198
+ for file in os.listdir(directory):
199
+ if re.search(pattern, file):
200
+ files_list.append(os.path.join(directory, file))
201
+
202
+ stem1_file = files_list[0]
203
+ stem2_file = files_list[1]
204
+
205
+ return stem1_file, stem2_file
206
+
207
+ def demucs_separator(demucs_audio, demucs_model, demucs_output_format, demucs_shifts, demucs_overlap):
208
+ files_list = []
209
+ files_list.clear()
210
+ directory = "./outputs"
211
+ random_id = str(random.randint(10000, 99999))
212
+ pattern = f"{random_id}"
213
+ os.makedirs("outputs", exist_ok=True)
214
+ write(f'{random_id}.wav', demucs_audio[0], demucs_audio[1])
215
+ prompt = f"audio-separator {random_id}.wav --model_filename {demucs_model} --output_dir=./outputs --output_format={demucs_output_format} --normalization=0.9 --demucs_shifts={demucs_shifts} --demucs_overlap={demucs_overlap}"
216
+
217
+ os.system(prompt)
218
+
219
+ for file in os.listdir(directory):
220
+ if re.search(pattern, file):
221
+ files_list.append(os.path.join(directory, file))
222
+
223
+ stem1_file = files_list[0]
224
+ stem2_file = files_list[1]
225
+ stem3_file = files_list[2]
226
+ stem4_file = files_list[3]
227
+
228
+ return stem1_file, stem2_file, stem3_file, stem4_file
229
+
230
+ with gr.Blocks(title="🎵 UVR5 UI 🎵") as app:
231
+ gr.Markdown("<h1> 🎵 UVR5 UI 🎵 </h1>")
232
+ with gr.Tabs():
233
+ with gr.TabItem("BS/Mel Roformer"):
234
+ with gr.Row():
235
+ roformer_model = gr.Dropdown(
236
+ label = "Select the Model",
237
+ choices=list(roformer_models.keys()),
238
+ interactive = True
239
+ )
240
+ roformer_output_format = gr.Dropdown(
241
+ label = "Select the Output Format",
242
+ choices = output_format,
243
+ interactive = True
244
+ )
245
+ with gr.Row():
246
+ roformer_overlap = gr.Slider(
247
+ minimum = 2,
248
+ maximum = 4,
249
+ step = 1,
250
+ label = "Overlap",
251
+ info = "Amount of overlap between prediction windows.",
252
+ value = 4,
253
+ interactive = True
254
+ )
255
+ with gr.Row():
256
+ roformer_audio = gr.Audio(
257
+ label = "Input Audio",
258
+ type = "numpy",
259
+ interactive = True
260
+ )
261
+ with gr.Row():
262
+ roformer_button = gr.Button("Separate!", variant = "primary")
263
+ with gr.Row():
264
+ roformer_stem1 = gr.Audio(
265
+ show_download_button = True,
266
+ interactive = False,
267
+ label = "Stem 1",
268
+ type = "filepath"
269
+ )
270
+ roformer_stem2 = gr.Audio(
271
+ show_download_button = True,
272
+ interactive = False,
273
+ label = "Stem 2",
274
+ type = "filepath"
275
+ )
276
+
277
+ roformer_button.click(roformer_separator, [roformer_audio, roformer_model, roformer_output_format, roformer_overlap], [roformer_stem1, roformer_stem2])
278
+
279
+ with gr.TabItem("MDX23C"):
280
+ with gr.Row():
281
+ mdx23c_model = gr.Dropdown(
282
+ label = "Select the Model",
283
+ choices = mdx23c_models,
284
+ interactive = True
285
+ )
286
+ mdx23c_output_format = gr.Dropdown(
287
+ label = "Select the Output Format",
288
+ choices = output_format,
289
+ interactive = True
290
+ )
291
+ with gr.Row():
292
+ mdx23c_segment_size = gr.Slider(
293
+ minimum = 32,
294
+ maximum = 4000,
295
+ step = 32,
296
+ label = "Segment Size",
297
+ info = "Larger consumes more resources, but may give better results.",
298
+ value = 256,
299
+ interactive = True
300
+ )
301
+ mdx23c_overlap = gr.Slider(
302
+ minimum = 2,
303
+ maximum = 50,
304
+ step = 1,
305
+ label = "Overlap",
306
+ info = "Amount of overlap between prediction windows.",
307
+ value = 8,
308
+ interactive = True
309
+ )
310
+ with gr.Row():
311
+ mdx23c_audio = gr.Audio(
312
+ label = "Input Audio",
313
+ type = "numpy",
314
+ interactive = True
315
+ )
316
+ with gr.Row():
317
+ mdx23c_button = gr.Button("Separate!", variant = "primary")
318
+ with gr.Row():
319
+ mdx23c_stem1 = gr.Audio(
320
+ show_download_button = True,
321
+ interactive = False,
322
+ label = "Stem 1",
323
+ type = "filepath"
324
+ )
325
+ mdx23c_stem2 = gr.Audio(
326
+ show_download_button = True,
327
+ interactive = False,
328
+ label = "Stem 2",
329
+ type = "filepath"
330
+ )
331
+
332
+ mdx23c_button.click(mdxc_separator, [mdx23c_audio, mdx23c_model, mdx23c_output_format, mdx23c_segment_size, mdx23c_overlap], [mdx23c_stem1, mdx23c_stem2])
333
+
334
+ with gr.TabItem("MDX-NET"):
335
+ with gr.Row():
336
+ mdxnet_model = gr.Dropdown(
337
+ label = "Select the Model",
338
+ choices = mdxnet_models,
339
+ interactive = True
340
+ )
341
+ mdxnet_output_format = gr.Dropdown(
342
+ label = "Select the Output Format",
343
+ choices = output_format,
344
+ interactive = True
345
+ )
346
+ with gr.Row():
347
+ mdxnet_segment_size = gr.Slider(
348
+ minimum = 32,
349
+ maximum = 4000,
350
+ step = 32,
351
+ label = "Segment Size",
352
+ info = "Larger consumes more resources, but may give better results.",
353
+ value = 256,
354
+ interactive = True
355
+ )
356
+ mdxnet_overlap = gr.Dropdown(
357
+ label = "Overlap",
358
+ choices = mdxnet_overlap_values,
359
+ value = mdxnet_overlap_values[0],
360
+ interactive = True
361
+ )
362
+ mdxnet_denoise = gr.Checkbox(
363
+ label = "Denoise",
364
+ info = "Enable denoising during separation.",
365
+ value = True,
366
+ interactive = True
367
+ )
368
+ with gr.Row():
369
+ mdxnet_audio = gr.Audio(
370
+ label = "Input Audio",
371
+ type = "numpy",
372
+ interactive = True
373
+ )
374
+ with gr.Row():
375
+ mdxnet_button = gr.Button("Separate", variant = "primary")
376
+ with gr.Row():
377
+ mdxnet_stem1 = gr.Audio(
378
+ show_download_button = True,
379
+ interactive = False,
380
+ label = "Stem 1",
381
+ type = "filepath"
382
+ )
383
+ mdxnet_stem2 = gr.Audio(
384
+ show_download_button = True,
385
+ interactive = False,
386
+ label = "Stem 2",
387
+ type = "filepath"
388
+ )
389
+
390
+ mdxnet_button.click(mdxnet_separator, [mdxnet_audio, mdxnet_model, mdxnet_output_format, mdxnet_segment_size, mdxnet_overlap, mdxnet_denoise], [mdxnet_stem1, mdxnet_stem2])
391
+
392
+ with gr.TabItem("VR ARCH"):
393
+ with gr.Row():
394
+ vrarch_model = gr.Dropdown(
395
+ label = "Select the Model",
396
+ choices = vrarch_models,
397
+ interactive = True
398
+ )
399
+ vrarch_output_format = gr.Dropdown(
400
+ label = "Select the Output Format",
401
+ choices = output_format,
402
+ interactive = True
403
+ )
404
+ with gr.Row():
405
+ vrarch_window_size = gr.Dropdown(
406
+ label = "Window Size",
407
+ choices = vrarch_window_size_values,
408
+ value = vrarch_window_size_values[0],
409
+ interactive = True
410
+ )
411
+ vrarch_agression = gr.Slider(
412
+ minimum = 1,
413
+ maximum = 50,
414
+ step = 1,
415
+ label = "Agression",
416
+ info = "Intensity of primary stem extraction.",
417
+ value = 5,
418
+ interactive = True
419
+ )
420
+ vrarch_tta = gr.Checkbox(
421
+ label = "TTA",
422
+ info = "Enable Test-Time-Augmentation; slow but improves quality.",
423
+ value = True,
424
+ visible = True,
425
+ interactive = True,
426
+ )
427
+ vrarch_high_end_process = gr.Checkbox(
428
+ label = "High End Process",
429
+ info = "Mirror the missing frequency range of the output.",
430
+ value = False,
431
+ visible = True,
432
+ interactive = True,
433
+ )
434
+ with gr.Row():
435
+ vrarch_audio = gr.Audio(
436
+ label = "Input Audio",
437
+ type = "numpy",
438
+ interactive = True
439
+ )
440
+ with gr.Row():
441
+ vrarch_button = gr.Button("Separate!", variant = "primary")
442
+ with gr.Row():
443
+ vrarch_stem1 = gr.Audio(
444
+ show_download_button = True,
445
+ interactive = False,
446
+ type = "filepath",
447
+ label = "Stem 1"
448
+ )
449
+ vrarch_stem2 = gr.Audio(
450
+ show_download_button = True,
451
+ interactive = False,
452
+ type = "filepath",
453
+ label = "Stem 2"
454
+ )
455
+
456
+ vrarch_button.click(vrarch_separator, [vrarch_audio, vrarch_model, vrarch_output_format, vrarch_window_size, vrarch_agression, vrarch_tta, vrarch_high_end_process], [vrarch_stem1, vrarch_stem2])
457
+
458
+ with gr.TabItem("Demucs"):
459
+ with gr.Row():
460
+ demucs_model = gr.Dropdown(
461
+ label = "Select the Model",
462
+ choices = demucs_models,
463
+ interactive = True
464
+ )
465
+ demucs_output_format = gr.Dropdown(
466
+ label = "Select the Output Format",
467
+ choices = output_format,
468
+ interactive = True
469
+ )
470
+ with gr.Row():
471
+ demucs_shifts = gr.Slider(
472
+ minimum = 1,
473
+ maximum = 20,
474
+ step = 1,
475
+ label = "Shifts",
476
+ info = "Number of predictions with random shifts, higher = slower but better quality.",
477
+ value = 2,
478
+ interactive = True
479
+ )
480
+ demucs_overlap = gr.Slider(
481
+ minimum = 0.001,
482
+ maximum = 0.999,
483
+ step = 0.001,
484
+ label = "Overlap",
485
+ info = "Amount of overlap between prediction windows.",
486
+ value = 0.025,
487
+ interactive = True
488
+ )
489
+ with gr.Row():
490
+ demucs_audio = gr.Audio(
491
+ label = "Input Audio",
492
+ type = "numpy",
493
+ interactive = True
494
+ )
495
+ with gr.Row():
496
+ demucs_button = gr.Button("Separate!", variant = "primary")
497
+ with gr.Row():
498
+ demucs_stem1 = gr.Audio(
499
+ show_download_button = True,
500
+ interactive = False,
501
+ type = "filepath",
502
+ label = "Stem 1"
503
+ )
504
+ demucs_stem2 = gr.Audio(
505
+ show_download_button = True,
506
+ interactive = False,
507
+ type = "filepath",
508
+ label = "Stem 2"
509
+ )
510
+ with gr.Row():
511
+ demucs_stem3 = gr.Audio(
512
+ show_download_button = True,
513
+ interactive = False,
514
+ type = "filepath",
515
+ label = "Stem 3"
516
+ )
517
+ demucs_stem4 = gr.Audio(
518
+ show_download_button = True,
519
+ interactive = False,
520
+ type = "filepath",
521
+ label = "Stem 4"
522
+ )
523
+
524
+ demucs_button.click(vrarch_separator, [demucs_audio, demucs_model, demucs_output_format, demucs_shifts, demucs_overlap], [demucs_stem1, demucs_stem2, demucs_stem3, demucs_stem4])
525
+
526
  app.launch()