File size: 5,237 Bytes
3925892 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
import os
import re
import random
from scipy.io.wavfile import write, read
import numpy as np
import yt_dlp
import subprocess
from pydub import AudioSegment
from audio_separator.separator import Separator
from lib.infer import infer_audio
import edge_tts
import tempfile
import anyio
from pathlib import Path
from lib.language_tts import language_dict
import zipfile
import shutil
import urllib.request
import gdown
import streamlit as st
main_dir = Path().resolve()
print(main_dir)
os.chdir(main_dir)
models_dir = "models"
# Download audio using yt-dlp
def download_audio(url):
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': 'ytdl/%(title)s.%(ext)s',
'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'wav', 'preferredquality': '192'}],
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info_dict = ydl.extract_info(url, download=True)
file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav'
sample_rate, audio_data = read(file_path)
audio_array = np.asarray(audio_data, dtype=np.int16)
return sample_rate, audio_array
def separate_audio(input_audio, output_dir, model_voc_inst, model_deecho, model_back_voc):
if not os.path.exists(output_dir):
os.makedirs(output_dir)
separator = Separator(output_dir=output_dir)
vocals = os.path.join(output_dir, 'Vocals.wav')
instrumental = os.path.join(output_dir, 'Instrumental.wav')
vocals_reverb = os.path.join(output_dir, 'Vocals (Reverb).wav')
vocals_no_reverb = os.path.join(output_dir, 'Vocals (No Reverb).wav')
lead_vocals = os.path.join(output_dir, 'Lead Vocals.wav')
backing_vocals = os.path.join(output_dir, 'Backing Vocals.wav')
separator.load_model(model_filename=model_voc_inst)
voc_inst = separator.separate(input_audio)
os.rename(os.path.join(output_dir, voc_inst[0]), instrumental)
os.rename(os.path.join(output_dir, voc_inst[1]), vocals)
separator.load_model(model_filename=model_deecho)
voc_no_reverb = separator.separate(vocals)
os.rename(os.path.join(output_dir, voc_no_reverb[0]), vocals_no_reverb)
os.rename(os.path.join(output_dir, voc_no_reverb[1]), vocals_reverb)
separator.load_model(model_filename=model_back_voc)
backing_voc = separator.separate(vocals_no_reverb)
os.rename(os.path.join(output_dir, backing_voc[0]), backing_vocals)
os.rename(os.path.join(output_dir, backing_voc[1]), lead_vocals)
return instrumental, vocals, vocals_reverb, vocals_no_reverb, lead_vocals, backing_vocals
async def text_to_speech_edge(text, language_code):
voice = language_dict.get(language_code, "default_voice")
communicate = edge_tts.Communicate(text, voice)
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
tmp_path = tmp_file.name
await communicate.save(tmp_path)
return tmp_path
# Streamlit UI
st.title("Hex RVC")
tabs = st.tabs(["Inference", "Download RVC Model", "Audio Separation"])
# Inference Tab
with tabs[0]:
st.header("Inference")
model_name = st.text_input("Model Name", placeholder="Enter model name")
sound_path = st.text_input("Audio Path (Optional)", placeholder="Leave blank to upload audio")
uploaded_audio = st.file_uploader("Upload Audio", type=["wav", "mp3"])
if uploaded_audio is not None:
with open("uploaded_audio.wav", "wb") as f:
f.write(uploaded_audio.read())
sound_path = "uploaded_audio.wav"
f0_change = st.number_input("Pitch Change (semitones)", value=0)
f0_method = st.selectbox("F0 Method", ["crepe", "harvest", "mangio-crepe", "rmvpe", "rmvpe+", "fcpe", "hybrid[rmvpe+fcpe]"], index=5)
if st.button("Run Inference"):
st.write("Running inference...")
# Download RVC Model Tab
with tabs[1]:
st.header("Download RVC Model")
url = st.text_input("Model URL")
dir_name = st.text_input("Model Name")
if st.button("Download Model"):
try:
download_online_model(url, dir_name)
st.success(f"Model {dir_name} downloaded successfully!")
except Exception as e:
st.error(str(e))
# Audio Separation Tab
with tabs[2]:
st.header("Audio Separation")
input_audio = st.file_uploader("Upload Audio for Separation", type=["wav", "mp3"])
if input_audio is not None:
with open("input_audio.wav", "wb") as f:
f.write(input_audio.read())
st.write("Audio uploaded successfully.")
if st.button("Separate Audio"):
st.write("Separating audio...")
output_dir = "./separated_audio"
inst, voc, voc_rev, voc_no_rev, lead_voc, back_voc = separate_audio("input_audio.wav", output_dir,
'model_bs_roformer.ckpt',
'UVR-DeEcho-DeReverb.pth',
'mel_band_karaoke.ckpt')
st.audio(inst)
st.audio(voc)
st.audio(voc_rev)
st.audio(voc_no_rev)
st.audio(lead_voc)
st.audio(back_voc)
|