Hev832 commited on
Commit
eaf41c5
1 Parent(s): 00e0077

Delete lib/main.py

Browse files
Files changed (1) hide show
  1. lib/main.py +0 -312
lib/main.py DELETED
@@ -1,312 +0,0 @@
1
- import argparse
2
- import gc
3
- import hashlib
4
- import json
5
- import os
6
- import shlex
7
- import subprocess
8
- from contextlib import suppress
9
- from urllib.parse import urlparse, parse_qs
10
-
11
- import gradio as gr
12
- import librosa
13
- import numpy as np
14
- import soundfile as sf
15
- import sox
16
- import yt_dlp
17
- from pedalboard import Pedalboard, Reverb, Compressor, HighpassFilter
18
- from pedalboard.io import AudioFile
19
- from pydub import AudioSegment
20
-
21
- from mdx import run_mdx
22
- from infer_libs.rvc.py import Config, load_hubert, get_vc, rvc_infer
23
-
24
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
25
-
26
- mdxnet_models_dir = os.path.join(BASE_DIR, 'mdxnet_models')
27
- rvc_models_dir = os.path.join(BASE_DIR, 'assets')
28
- output_dir = os.path.join(BASE_DIR, 'song_output')
29
-
30
-
31
- def get_youtube_video_id(url, ignore_playlist=True):
32
- """
33
- Extracts the video ID from a YouTube URL.
34
- """
35
- query = urlparse(url)
36
- if query.hostname == 'youtu.be':
37
- if query.path[1:] == 'watch':
38
- return query.query[2:]
39
- return query.path[1:]
40
-
41
- if query.hostname in {'www.youtube.com', 'youtube.com', 'music.youtube.com'}:
42
- if not ignore_playlist:
43
- with suppress(KeyError):
44
- return parse_qs(query.query)['list'][0]
45
- if query.path == '/watch':
46
- return parse_qs(query.query)['v'][0]
47
- if query.path[:7] == '/watch/':
48
- return query.path.split('/')[1]
49
- if query.path[:7] == '/embed/':
50
- return query.path.split('/')[2]
51
- if query.path[:3] == '/v/':
52
- return query.path.split('/')[2]
53
- if query.path[:8] == '/shorts/':
54
- return query.path.split('/')[2]
55
-
56
- return None
57
-
58
-
59
-
60
- def yt_download(link):
61
- """
62
- Downloads the best audio format from a YouTube link.
63
- """
64
- ydl_opts = {
65
- 'format': 'bestaudio',
66
- 'outtmpl': '%(title)s',
67
- 'nocheckcertificate': True,
68
- 'ignoreerrors': True,
69
- 'no_warnings': True,
70
- 'quiet': True,
71
- 'extractaudio': True,
72
- 'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3'}],
73
- }
74
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
75
- result = ydl.extract_info(link, download=True)
76
- download_path = ydl.prepare_filename(result, outtmpl='%(title)s.mp3')
77
-
78
- return download_path
79
-
80
-
81
- def raise_exception(error_msg, is_webui):
82
- if is_webui:
83
- raise gr.Error(error_msg)
84
- else:
85
- raise Exception(error_msg)
86
-
87
-
88
- def get_rvc_model(voice_model, is_webui):
89
- rvc_model_filename, rvc_index_filename = None, None
90
- model_dir = os.path.join(rvc_models_dir, voice_model)
91
- for file in os.listdir(model_dir):
92
- ext = os.path.splitext(file)[1]
93
- if ext == '.pth':
94
- rvc_model_filename = file
95
- if ext == '.index':
96
- rvc_index_filename = file
97
-
98
- if rvc_model_filename is None:
99
- error_msg = f'No model file exists in {model_dir}.'
100
- raise_exception(error_msg, is_webui)
101
-
102
- return os.path.join(model_dir, rvc_model_filename), os.path.join(model_dir, rvc_index_filename) if rvc_index_filename else ''
103
-
104
-
105
- def get_audio_paths(song_dir):
106
- orig_song_path = None
107
- instrumentals_path = None
108
- main_vocals_dereverb_path = None
109
- backup_vocals_path = None
110
-
111
- for file in os.listdir(song_dir):
112
- if file.endswith('_Instrumental.wav'):
113
- instrumentals_path = os.path.join(song_dir, file)
114
- orig_song_path = instrumentals_path.replace('_Instrumental', '')
115
-
116
- elif file.endswith('_Vocals_Main_DeReverb.wav'):
117
- main_vocals_dereverb_path = os.path.join(song_dir, file)
118
-
119
- elif file.endswith('_Vocals_Backup.wav'):
120
- backup_vocals_path = os.path.join(song_dir, file)
121
-
122
- return orig_song_path, instrumentals_path, main_vocals_dereverb_path, backup_vocals_path
123
-
124
-
125
- def convert_to_stereo(audio_path):
126
- wave, sr = librosa.load(audio_path, mono=False, sr=44100)
127
-
128
- if type(wave[0]) != np.ndarray:
129
- stereo_path = f'{os.path.splitext(audio_path)[0]}_stereo.wav'
130
- command = shlex.split(f'ffmpeg -y -loglevel error -i "{audio_path}" -ac 2 -f wav "{stereo_path}"')
131
- subprocess.run(command)
132
- return stereo_path
133
- else:
134
- return audio_path
135
-
136
-
137
- def pitch_shift(audio_path, pitch_change):
138
- output_path = f'{os.path.splitext(audio_path)[0]}_p{pitch_change}.wav'
139
- if not os.path.exists(output_path):
140
- y, sr = sf.read(audio_path)
141
- tfm = sox.Transformer()
142
- tfm.pitch(pitch_change)
143
- y_shifted = tfm.build_array(input_array=y, sample_rate_in=sr)
144
- sf.write(output_path, y_shifted, sr)
145
-
146
- return output_path
147
-
148
-
149
- def get_hash(filepath):
150
- with open(filepath, 'rb') as f:
151
- file_hash = hashlib.blake2b()
152
- while chunk := f.read(8192):
153
- file_hash.update(chunk)
154
-
155
- return file_hash.hexdigest()[:11]
156
-
157
-
158
- def display_progress(message, percent, is_webui, progress=None):
159
- if is_webui:
160
- progress(percent, desc=message)
161
- else:
162
- print(message)
163
-
164
-
165
- def preprocess_song(song_input, mdx_model_params, song_id, is_webui, input_type, progress=None):
166
- keep_orig = False
167
- if input_type == 'yt':
168
- display_progress('[~] Downloading song...', 0, is_webui, progress)
169
- song_link = song_input.split('&')[0]
170
- orig_song_path = yt_download(song_link)
171
- elif input_type == 'local':
172
- orig_song_path = song_input
173
- keep_orig = True
174
- else:
175
- orig_song_path = None
176
-
177
- song_output_dir = os.path.join(output_dir, song_id)
178
- orig_song_path = convert_to_stereo(orig_song_path)
179
-
180
- display_progress('[~] Separating Vocals from Instrumental...', 0.1, is_webui, progress)
181
- vocals_path, instrumentals_path = run_mdx(mdx_model_params, song_output_dir, os.path.join(mdxnet_models_dir, 'Kim_Vocal_2.onnx'), orig_song_path, denoise=True, keep_orig=keep_orig)
182
-
183
- display_progress('[~] Separating Main Vocals from Backup Vocals...', 0.2, is_webui, progress)
184
- backup_vocals_path, main_vocals_path = run_mdx(mdx_model_params, song_output_dir, os.path.join(mdxnet_models_dir, 'UVR_MDXNET_KARA_2.onnx'), vocals_path, suffix='Backup', invert_suffix='Main', denoise=True)
185
-
186
- display_progress('[~] Applying DeReverb to Vocals...', 0.3, is_webui, progress)
187
- _, main_vocals_dereverb_path = run_mdx(mdx_model_params, song_output_dir, os.path.join(mdxnet_models_dir, 'Reverb_HQ_By_FoxJoy.onnx'), main_vocals_path, invert_suffix='DeReverb', exclude_main=True, denoise=True)
188
-
189
- return orig_song_path, vocals_path, instrumentals_path, main_vocals_path, backup_vocals_path, main_vocals_dereverb_path
190
-
191
-
192
- def voice_change(voice_model, vocals_path, output_path, pitch_change, f0_method, index_rate, filter_radius, rms_mix_rate, protect, crepe_hop_length, is_webui):
193
- rvc_model_path, rvc_index_path = get_rvc_model(voice_model, is_webui)
194
- device = 'cuda:0'
195
- config = Config(device, True)
196
- hubert_model = load_hubert(device, config.is_half, os.path.join(rvc_models_dir, 'hubert_base.pt'))
197
- cpt, version, net_g, tgt_sr, vc = get_vc(device, config.is_half, config, rvc_model_path)
198
-
199
- rvc_infer(rvc_index_path, index_rate, vocals_path, output_path, pitch_change, f0_method, cpt, version, net_g, filter_radius, tgt_sr, rms_mix_rate, protect, crepe_hop_length, vc, hubert_model)
200
- del hubert_model, cpt
201
- gc.collect()
202
-
203
-
204
- def add_audio_effects(audio_path, reverb_rm_size, reverb_wet, reverb_dry, reverb_damping):
205
- output_path = f'{os.path.splitext(audio_path)[0]}_mixed.wav'
206
-
207
- board = Pedalboard(
208
- [
209
- HighpassFilter(),
210
- Compressor(ratio=4, threshold_db=-15),
211
- Reverb(room_size=reverb_rm_size, dry_level=reverb_dry, wet_level=reverb_wet, damping=reverb_damping)
212
- ]
213
- )
214
-
215
- with AudioFile(audio_path) as f:
216
- with AudioFile(output_path, 'w', f.samplerate, f.num_channels) as o:
217
- while f.tell() < f.frames:
218
- chunk = f.read(int(f.samplerate))
219
- effected = board(chunk, f.samplerate, reset=False)
220
- o.write(effected)
221
-
222
- return output_path
223
-
224
-
225
- def merge_audios(audio_paths, output_path):
226
- combined = AudioSegment.from_file(audio_paths[0])
227
- for path in audio_paths[1:]:
228
- combined = combined.overlay(AudioSegment.from_file(path))
229
- combined.export(output_path, format='wav')
230
-
231
-
232
- def process_and_save_song(song_input, input_type, voice_model, pitch_change, f0_method, index_rate, filter_radius, rms_mix_rate, protect, crepe_hop_length, reverb_rm_size, reverb_wet, reverb_dry, reverb_damping, progress, is_webui=False):
233
- song_id = get_hash(song_input)
234
- if not os.path.exists(output_dir):
235
- os.makedirs(output_dir)
236
-
237
- song_output_dir = os.path.join(output_dir, song_id)
238
- if not os.path.exists(song_output_dir):
239
- os.makedirs(song_output_dir)
240
-
241
- if input_type == 'yt' and not get_youtube_video_id(song_input):
242
- raise_exception('[!] Invalid YouTube link.', is_webui)
243
-
244
- mdx_model_params = {
245
- 'demucs_model_path': os.path.join(mdxnet_models_dir, 'models_demucs.h5'),
246
- 'mdx_model_path': os.path.join(mdxnet_models_dir, 'models_mdx.h5'),
247
- 'output_path': output_dir,
248
- 'noise_protect': 0.33,
249
- 'voc_model_path': os.path.join(mdxnet_models_dir, 'models_vocal.h5')
250
- }
251
-
252
- try:
253
- orig_song_path, vocals_path, instrumentals_path, main_vocals_path, backup_vocals_path, main_vocals_dereverb_path = preprocess_song(song_input, mdx_model_params, song_id, is_webui, input_type, progress)
254
-
255
- display_progress('[~] Changing Main Vocals to Target Voice...', 0.4, is_webui, progress)
256
- pitch_shifted_main_vocals_path = pitch_shift(main_vocals_dereverb_path, pitch_change)
257
- output_vocals_path = os.path.join(song_output_dir, 'main_vocals_changed.wav')
258
- voice_change(voice_model, pitch_shifted_main_vocals_path, output_vocals_path, pitch_change, f0_method, index_rate, filter_radius, rms_mix_rate, protect, crepe_hop_length, is_webui)
259
-
260
- display_progress('[~] Adding Audio Effects...', 0.5, is_webui, progress)
261
- final_output_vocals_path = add_audio_effects(output_vocals_path, reverb_rm_size, reverb_wet, reverb_dry, reverb_damping)
262
-
263
- display_progress('[~] Merging Vocal and Instrumental Tracks...', 0.6, is_webui, progress)
264
- final_output_path = os.path.join(output_dir, f'{os.path.basename(orig_song_path)}_{voice_model}_vocal_conversion.wav')
265
- merge_audios([final_output_vocals_path, instrumentals_path], final_output_path)
266
-
267
- display_progress('[~] Done!', 1.0, is_webui, progress)
268
- except Exception as e:
269
- raise_exception(f'[!] Processing failed: {str(e)}', is_webui)
270
- finally:
271
- with suppress(FileNotFoundError):
272
- os.remove(orig_song_path)
273
-
274
- return final_output_path
275
-
276
-
277
- if __name__ == '__main__':
278
- parser = argparse.ArgumentParser(description='Process song with RVC.')
279
- parser.add_argument('--input', type=str, required=True, help='Path to the input song file or YouTube link.')
280
- parser.add_argument('--type', type=str, required=True, choices=['local', 'yt'], help='Type of input: "local" for a file, "yt" for a YouTube link.')
281
- parser.add_argument('--voice_model', type=str, required=True, help='Name of the voice model to use.')
282
- parser.add_argument('--pitch_change', type=float, required=False, default=0, help='Pitch change amount in semitones.')
283
- parser.add_argument('--f0_method', type=str, required=False, default='crepe', help='F0 method to use.')
284
- parser.add_argument('--index_rate', type=float, required=False, default=1.0, help='Index rate.')
285
- parser.add_argument('--filter_radius', type=float, required=False, default=3.0, help='Filter radius.')
286
- parser.add_argument('--rms_mix_rate', type=float, required=False, default=0.25, help='RMS mix rate.')
287
- parser.add_argument('--protect', type=float, required=False, default=0.33, help='Protection rate.')
288
- parser.add_argument('--crepe_hop_length', type=int, required=False, default=128, help='Crepe hop length.')
289
- parser.add_argument('--reverb_rm_size', type=float, required=False, default=0.3, help='Reverb room size.')
290
- parser.add_argument('--reverb_wet', type=float, required=False, default=0.25, help='Reverb wet level.')
291
- parser.add_argument('--reverb_dry', type=float, required=False, default=0.75, help='Reverb dry level.')
292
- parser.add_argument('--reverb_damping', type=float, required=False, default=0.5, help='Reverb damping.')
293
-
294
- args = parser.parse_args()
295
-
296
- process_and_save_song(
297
- song_input=args.input,
298
- input_type=args.type,
299
- voice_model=args.voice_model,
300
- pitch_change=args.pitch_change,
301
- f0_method=args.f0_method,
302
- index_rate=args.index_rate,
303
- filter_radius=args.filter_radius,
304
- rms_mix_rate=args.rms_mix_rate,
305
- protect=args.protect,
306
- crepe_hop_length=args.crepe_hop_length,
307
- reverb_rm_size=args.reverb_rm_size,
308
- reverb_wet=args.reverb_wet,
309
- reverb_dry=args.reverb_dry,
310
- reverb_damping=args.reverb_damping,
311
- progress=None
312
- )