Delete lib/main.py
Browse files- lib/main.py +0 -312
lib/main.py
DELETED
@@ -1,312 +0,0 @@
|
|
1 |
-
import argparse
|
2 |
-
import gc
|
3 |
-
import hashlib
|
4 |
-
import json
|
5 |
-
import os
|
6 |
-
import shlex
|
7 |
-
import subprocess
|
8 |
-
from contextlib import suppress
|
9 |
-
from urllib.parse import urlparse, parse_qs
|
10 |
-
|
11 |
-
import gradio as gr
|
12 |
-
import librosa
|
13 |
-
import numpy as np
|
14 |
-
import soundfile as sf
|
15 |
-
import sox
|
16 |
-
import yt_dlp
|
17 |
-
from pedalboard import Pedalboard, Reverb, Compressor, HighpassFilter
|
18 |
-
from pedalboard.io import AudioFile
|
19 |
-
from pydub import AudioSegment
|
20 |
-
|
21 |
-
from mdx import run_mdx
|
22 |
-
from infer_libs.rvc.py import Config, load_hubert, get_vc, rvc_infer
|
23 |
-
|
24 |
-
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
25 |
-
|
26 |
-
mdxnet_models_dir = os.path.join(BASE_DIR, 'mdxnet_models')
|
27 |
-
rvc_models_dir = os.path.join(BASE_DIR, 'assets')
|
28 |
-
output_dir = os.path.join(BASE_DIR, 'song_output')
|
29 |
-
|
30 |
-
|
31 |
-
def get_youtube_video_id(url, ignore_playlist=True):
|
32 |
-
"""
|
33 |
-
Extracts the video ID from a YouTube URL.
|
34 |
-
"""
|
35 |
-
query = urlparse(url)
|
36 |
-
if query.hostname == 'youtu.be':
|
37 |
-
if query.path[1:] == 'watch':
|
38 |
-
return query.query[2:]
|
39 |
-
return query.path[1:]
|
40 |
-
|
41 |
-
if query.hostname in {'www.youtube.com', 'youtube.com', 'music.youtube.com'}:
|
42 |
-
if not ignore_playlist:
|
43 |
-
with suppress(KeyError):
|
44 |
-
return parse_qs(query.query)['list'][0]
|
45 |
-
if query.path == '/watch':
|
46 |
-
return parse_qs(query.query)['v'][0]
|
47 |
-
if query.path[:7] == '/watch/':
|
48 |
-
return query.path.split('/')[1]
|
49 |
-
if query.path[:7] == '/embed/':
|
50 |
-
return query.path.split('/')[2]
|
51 |
-
if query.path[:3] == '/v/':
|
52 |
-
return query.path.split('/')[2]
|
53 |
-
if query.path[:8] == '/shorts/':
|
54 |
-
return query.path.split('/')[2]
|
55 |
-
|
56 |
-
return None
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
def yt_download(link):
|
61 |
-
"""
|
62 |
-
Downloads the best audio format from a YouTube link.
|
63 |
-
"""
|
64 |
-
ydl_opts = {
|
65 |
-
'format': 'bestaudio',
|
66 |
-
'outtmpl': '%(title)s',
|
67 |
-
'nocheckcertificate': True,
|
68 |
-
'ignoreerrors': True,
|
69 |
-
'no_warnings': True,
|
70 |
-
'quiet': True,
|
71 |
-
'extractaudio': True,
|
72 |
-
'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3'}],
|
73 |
-
}
|
74 |
-
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
75 |
-
result = ydl.extract_info(link, download=True)
|
76 |
-
download_path = ydl.prepare_filename(result, outtmpl='%(title)s.mp3')
|
77 |
-
|
78 |
-
return download_path
|
79 |
-
|
80 |
-
|
81 |
-
def raise_exception(error_msg, is_webui):
|
82 |
-
if is_webui:
|
83 |
-
raise gr.Error(error_msg)
|
84 |
-
else:
|
85 |
-
raise Exception(error_msg)
|
86 |
-
|
87 |
-
|
88 |
-
def get_rvc_model(voice_model, is_webui):
|
89 |
-
rvc_model_filename, rvc_index_filename = None, None
|
90 |
-
model_dir = os.path.join(rvc_models_dir, voice_model)
|
91 |
-
for file in os.listdir(model_dir):
|
92 |
-
ext = os.path.splitext(file)[1]
|
93 |
-
if ext == '.pth':
|
94 |
-
rvc_model_filename = file
|
95 |
-
if ext == '.index':
|
96 |
-
rvc_index_filename = file
|
97 |
-
|
98 |
-
if rvc_model_filename is None:
|
99 |
-
error_msg = f'No model file exists in {model_dir}.'
|
100 |
-
raise_exception(error_msg, is_webui)
|
101 |
-
|
102 |
-
return os.path.join(model_dir, rvc_model_filename), os.path.join(model_dir, rvc_index_filename) if rvc_index_filename else ''
|
103 |
-
|
104 |
-
|
105 |
-
def get_audio_paths(song_dir):
|
106 |
-
orig_song_path = None
|
107 |
-
instrumentals_path = None
|
108 |
-
main_vocals_dereverb_path = None
|
109 |
-
backup_vocals_path = None
|
110 |
-
|
111 |
-
for file in os.listdir(song_dir):
|
112 |
-
if file.endswith('_Instrumental.wav'):
|
113 |
-
instrumentals_path = os.path.join(song_dir, file)
|
114 |
-
orig_song_path = instrumentals_path.replace('_Instrumental', '')
|
115 |
-
|
116 |
-
elif file.endswith('_Vocals_Main_DeReverb.wav'):
|
117 |
-
main_vocals_dereverb_path = os.path.join(song_dir, file)
|
118 |
-
|
119 |
-
elif file.endswith('_Vocals_Backup.wav'):
|
120 |
-
backup_vocals_path = os.path.join(song_dir, file)
|
121 |
-
|
122 |
-
return orig_song_path, instrumentals_path, main_vocals_dereverb_path, backup_vocals_path
|
123 |
-
|
124 |
-
|
125 |
-
def convert_to_stereo(audio_path):
|
126 |
-
wave, sr = librosa.load(audio_path, mono=False, sr=44100)
|
127 |
-
|
128 |
-
if type(wave[0]) != np.ndarray:
|
129 |
-
stereo_path = f'{os.path.splitext(audio_path)[0]}_stereo.wav'
|
130 |
-
command = shlex.split(f'ffmpeg -y -loglevel error -i "{audio_path}" -ac 2 -f wav "{stereo_path}"')
|
131 |
-
subprocess.run(command)
|
132 |
-
return stereo_path
|
133 |
-
else:
|
134 |
-
return audio_path
|
135 |
-
|
136 |
-
|
137 |
-
def pitch_shift(audio_path, pitch_change):
|
138 |
-
output_path = f'{os.path.splitext(audio_path)[0]}_p{pitch_change}.wav'
|
139 |
-
if not os.path.exists(output_path):
|
140 |
-
y, sr = sf.read(audio_path)
|
141 |
-
tfm = sox.Transformer()
|
142 |
-
tfm.pitch(pitch_change)
|
143 |
-
y_shifted = tfm.build_array(input_array=y, sample_rate_in=sr)
|
144 |
-
sf.write(output_path, y_shifted, sr)
|
145 |
-
|
146 |
-
return output_path
|
147 |
-
|
148 |
-
|
149 |
-
def get_hash(filepath):
|
150 |
-
with open(filepath, 'rb') as f:
|
151 |
-
file_hash = hashlib.blake2b()
|
152 |
-
while chunk := f.read(8192):
|
153 |
-
file_hash.update(chunk)
|
154 |
-
|
155 |
-
return file_hash.hexdigest()[:11]
|
156 |
-
|
157 |
-
|
158 |
-
def display_progress(message, percent, is_webui, progress=None):
|
159 |
-
if is_webui:
|
160 |
-
progress(percent, desc=message)
|
161 |
-
else:
|
162 |
-
print(message)
|
163 |
-
|
164 |
-
|
165 |
-
def preprocess_song(song_input, mdx_model_params, song_id, is_webui, input_type, progress=None):
|
166 |
-
keep_orig = False
|
167 |
-
if input_type == 'yt':
|
168 |
-
display_progress('[~] Downloading song...', 0, is_webui, progress)
|
169 |
-
song_link = song_input.split('&')[0]
|
170 |
-
orig_song_path = yt_download(song_link)
|
171 |
-
elif input_type == 'local':
|
172 |
-
orig_song_path = song_input
|
173 |
-
keep_orig = True
|
174 |
-
else:
|
175 |
-
orig_song_path = None
|
176 |
-
|
177 |
-
song_output_dir = os.path.join(output_dir, song_id)
|
178 |
-
orig_song_path = convert_to_stereo(orig_song_path)
|
179 |
-
|
180 |
-
display_progress('[~] Separating Vocals from Instrumental...', 0.1, is_webui, progress)
|
181 |
-
vocals_path, instrumentals_path = run_mdx(mdx_model_params, song_output_dir, os.path.join(mdxnet_models_dir, 'Kim_Vocal_2.onnx'), orig_song_path, denoise=True, keep_orig=keep_orig)
|
182 |
-
|
183 |
-
display_progress('[~] Separating Main Vocals from Backup Vocals...', 0.2, is_webui, progress)
|
184 |
-
backup_vocals_path, main_vocals_path = run_mdx(mdx_model_params, song_output_dir, os.path.join(mdxnet_models_dir, 'UVR_MDXNET_KARA_2.onnx'), vocals_path, suffix='Backup', invert_suffix='Main', denoise=True)
|
185 |
-
|
186 |
-
display_progress('[~] Applying DeReverb to Vocals...', 0.3, is_webui, progress)
|
187 |
-
_, main_vocals_dereverb_path = run_mdx(mdx_model_params, song_output_dir, os.path.join(mdxnet_models_dir, 'Reverb_HQ_By_FoxJoy.onnx'), main_vocals_path, invert_suffix='DeReverb', exclude_main=True, denoise=True)
|
188 |
-
|
189 |
-
return orig_song_path, vocals_path, instrumentals_path, main_vocals_path, backup_vocals_path, main_vocals_dereverb_path
|
190 |
-
|
191 |
-
|
192 |
-
def voice_change(voice_model, vocals_path, output_path, pitch_change, f0_method, index_rate, filter_radius, rms_mix_rate, protect, crepe_hop_length, is_webui):
|
193 |
-
rvc_model_path, rvc_index_path = get_rvc_model(voice_model, is_webui)
|
194 |
-
device = 'cuda:0'
|
195 |
-
config = Config(device, True)
|
196 |
-
hubert_model = load_hubert(device, config.is_half, os.path.join(rvc_models_dir, 'hubert_base.pt'))
|
197 |
-
cpt, version, net_g, tgt_sr, vc = get_vc(device, config.is_half, config, rvc_model_path)
|
198 |
-
|
199 |
-
rvc_infer(rvc_index_path, index_rate, vocals_path, output_path, pitch_change, f0_method, cpt, version, net_g, filter_radius, tgt_sr, rms_mix_rate, protect, crepe_hop_length, vc, hubert_model)
|
200 |
-
del hubert_model, cpt
|
201 |
-
gc.collect()
|
202 |
-
|
203 |
-
|
204 |
-
def add_audio_effects(audio_path, reverb_rm_size, reverb_wet, reverb_dry, reverb_damping):
|
205 |
-
output_path = f'{os.path.splitext(audio_path)[0]}_mixed.wav'
|
206 |
-
|
207 |
-
board = Pedalboard(
|
208 |
-
[
|
209 |
-
HighpassFilter(),
|
210 |
-
Compressor(ratio=4, threshold_db=-15),
|
211 |
-
Reverb(room_size=reverb_rm_size, dry_level=reverb_dry, wet_level=reverb_wet, damping=reverb_damping)
|
212 |
-
]
|
213 |
-
)
|
214 |
-
|
215 |
-
with AudioFile(audio_path) as f:
|
216 |
-
with AudioFile(output_path, 'w', f.samplerate, f.num_channels) as o:
|
217 |
-
while f.tell() < f.frames:
|
218 |
-
chunk = f.read(int(f.samplerate))
|
219 |
-
effected = board(chunk, f.samplerate, reset=False)
|
220 |
-
o.write(effected)
|
221 |
-
|
222 |
-
return output_path
|
223 |
-
|
224 |
-
|
225 |
-
def merge_audios(audio_paths, output_path):
|
226 |
-
combined = AudioSegment.from_file(audio_paths[0])
|
227 |
-
for path in audio_paths[1:]:
|
228 |
-
combined = combined.overlay(AudioSegment.from_file(path))
|
229 |
-
combined.export(output_path, format='wav')
|
230 |
-
|
231 |
-
|
232 |
-
def process_and_save_song(song_input, input_type, voice_model, pitch_change, f0_method, index_rate, filter_radius, rms_mix_rate, protect, crepe_hop_length, reverb_rm_size, reverb_wet, reverb_dry, reverb_damping, progress, is_webui=False):
|
233 |
-
song_id = get_hash(song_input)
|
234 |
-
if not os.path.exists(output_dir):
|
235 |
-
os.makedirs(output_dir)
|
236 |
-
|
237 |
-
song_output_dir = os.path.join(output_dir, song_id)
|
238 |
-
if not os.path.exists(song_output_dir):
|
239 |
-
os.makedirs(song_output_dir)
|
240 |
-
|
241 |
-
if input_type == 'yt' and not get_youtube_video_id(song_input):
|
242 |
-
raise_exception('[!] Invalid YouTube link.', is_webui)
|
243 |
-
|
244 |
-
mdx_model_params = {
|
245 |
-
'demucs_model_path': os.path.join(mdxnet_models_dir, 'models_demucs.h5'),
|
246 |
-
'mdx_model_path': os.path.join(mdxnet_models_dir, 'models_mdx.h5'),
|
247 |
-
'output_path': output_dir,
|
248 |
-
'noise_protect': 0.33,
|
249 |
-
'voc_model_path': os.path.join(mdxnet_models_dir, 'models_vocal.h5')
|
250 |
-
}
|
251 |
-
|
252 |
-
try:
|
253 |
-
orig_song_path, vocals_path, instrumentals_path, main_vocals_path, backup_vocals_path, main_vocals_dereverb_path = preprocess_song(song_input, mdx_model_params, song_id, is_webui, input_type, progress)
|
254 |
-
|
255 |
-
display_progress('[~] Changing Main Vocals to Target Voice...', 0.4, is_webui, progress)
|
256 |
-
pitch_shifted_main_vocals_path = pitch_shift(main_vocals_dereverb_path, pitch_change)
|
257 |
-
output_vocals_path = os.path.join(song_output_dir, 'main_vocals_changed.wav')
|
258 |
-
voice_change(voice_model, pitch_shifted_main_vocals_path, output_vocals_path, pitch_change, f0_method, index_rate, filter_radius, rms_mix_rate, protect, crepe_hop_length, is_webui)
|
259 |
-
|
260 |
-
display_progress('[~] Adding Audio Effects...', 0.5, is_webui, progress)
|
261 |
-
final_output_vocals_path = add_audio_effects(output_vocals_path, reverb_rm_size, reverb_wet, reverb_dry, reverb_damping)
|
262 |
-
|
263 |
-
display_progress('[~] Merging Vocal and Instrumental Tracks...', 0.6, is_webui, progress)
|
264 |
-
final_output_path = os.path.join(output_dir, f'{os.path.basename(orig_song_path)}_{voice_model}_vocal_conversion.wav')
|
265 |
-
merge_audios([final_output_vocals_path, instrumentals_path], final_output_path)
|
266 |
-
|
267 |
-
display_progress('[~] Done!', 1.0, is_webui, progress)
|
268 |
-
except Exception as e:
|
269 |
-
raise_exception(f'[!] Processing failed: {str(e)}', is_webui)
|
270 |
-
finally:
|
271 |
-
with suppress(FileNotFoundError):
|
272 |
-
os.remove(orig_song_path)
|
273 |
-
|
274 |
-
return final_output_path
|
275 |
-
|
276 |
-
|
277 |
-
if __name__ == '__main__':
|
278 |
-
parser = argparse.ArgumentParser(description='Process song with RVC.')
|
279 |
-
parser.add_argument('--input', type=str, required=True, help='Path to the input song file or YouTube link.')
|
280 |
-
parser.add_argument('--type', type=str, required=True, choices=['local', 'yt'], help='Type of input: "local" for a file, "yt" for a YouTube link.')
|
281 |
-
parser.add_argument('--voice_model', type=str, required=True, help='Name of the voice model to use.')
|
282 |
-
parser.add_argument('--pitch_change', type=float, required=False, default=0, help='Pitch change amount in semitones.')
|
283 |
-
parser.add_argument('--f0_method', type=str, required=False, default='crepe', help='F0 method to use.')
|
284 |
-
parser.add_argument('--index_rate', type=float, required=False, default=1.0, help='Index rate.')
|
285 |
-
parser.add_argument('--filter_radius', type=float, required=False, default=3.0, help='Filter radius.')
|
286 |
-
parser.add_argument('--rms_mix_rate', type=float, required=False, default=0.25, help='RMS mix rate.')
|
287 |
-
parser.add_argument('--protect', type=float, required=False, default=0.33, help='Protection rate.')
|
288 |
-
parser.add_argument('--crepe_hop_length', type=int, required=False, default=128, help='Crepe hop length.')
|
289 |
-
parser.add_argument('--reverb_rm_size', type=float, required=False, default=0.3, help='Reverb room size.')
|
290 |
-
parser.add_argument('--reverb_wet', type=float, required=False, default=0.25, help='Reverb wet level.')
|
291 |
-
parser.add_argument('--reverb_dry', type=float, required=False, default=0.75, help='Reverb dry level.')
|
292 |
-
parser.add_argument('--reverb_damping', type=float, required=False, default=0.5, help='Reverb damping.')
|
293 |
-
|
294 |
-
args = parser.parse_args()
|
295 |
-
|
296 |
-
process_and_save_song(
|
297 |
-
song_input=args.input,
|
298 |
-
input_type=args.type,
|
299 |
-
voice_model=args.voice_model,
|
300 |
-
pitch_change=args.pitch_change,
|
301 |
-
f0_method=args.f0_method,
|
302 |
-
index_rate=args.index_rate,
|
303 |
-
filter_radius=args.filter_radius,
|
304 |
-
rms_mix_rate=args.rms_mix_rate,
|
305 |
-
protect=args.protect,
|
306 |
-
crepe_hop_length=args.crepe_hop_length,
|
307 |
-
reverb_rm_size=args.reverb_rm_size,
|
308 |
-
reverb_wet=args.reverb_wet,
|
309 |
-
reverb_dry=args.reverb_dry,
|
310 |
-
reverb_damping=args.reverb_damping,
|
311 |
-
progress=None
|
312 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|