Spaces:

ArkanDash
/

rvc-genshin-impact

Running on CPU Upgrade

File size: 3,197 Bytes

773691e

import os
import wave
import subprocess
import yt_dlp
import ffmpeg
import logging
from fairseq import checkpoint_utils
logger = logging.getLogger(__name__)

def load_hubert(config):
    path_check = os.path.exists("assets/hubert/hubert_base.pt")
    if path_check is False:
        logger.warn("hubert_base.pt is missing. Please check the documentation for to get it.")
    else:
        logger.info("hubert_base.pt found.")
    models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
        [os.path.join("assets", "hubert", "hubert_base.pt")],
        suffix="",
    )
    hubert_model = models[0]
    hubert_model = hubert_model.to(config.device)
    if config.is_half:
        hubert_model = hubert_model.half()
    else:
        hubert_model = hubert_model.float()
    hubert_model.eval()
    return hubert_model

def download_audio(url, audio_provider):
    logs = []
    if url == "":
        logs.append("URL required!")
        yield None, "\n".join(logs)
        return None, "\n".join(logs)
    if not os.path.exists("yt"):
        os.mkdir("yt")
    if audio_provider == "Youtube":
        logs.append("Downloading the audio...")
        yield None, "\n".join(logs)
        ydl_opts = {
            'noplaylist': True,
            'format': 'bestaudio/best',
            'postprocessors': [{
                'key': 'FFmpegExtractAudio',
                'preferredcodec': 'wav',
            }],
            "outtmpl": 'yt/audio',
        }
        audio_path = "yt/audio.wav"
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])
        logs.append("Download Complete.")
        yield audio_path, "\n".join(logs)

def cut_vocal_and_inst(split_model):
    logs = []
    logs.append("Starting the audio splitting process...")
    yield "\n".join(logs), None, None, None
    command = f"demucs --two-stems=vocals -n {split_model} yt/audio.wav -o output"
    result = subprocess.Popen(command.split(), stdout=subprocess.PIPE, text=True)
    for line in result.stdout:
        logs.append(line)
        yield "\n".join(logs), None, None, None
    logger.info(result.stdout)
    vocal = f"output/{split_model}/audio/vocals.wav"
    inst = f"output/{split_model}/audio/no_vocals.wav"
    logs.append("Audio splitting complete.")
    yield "\n".join(logs), vocal, inst, vocal

def combine_vocal_and_inst(audio_data, vocal_volume, inst_volume, split_model):
    if not os.path.exists("output/result"):
        os.mkdir("output/result")
    vocal_path = "output/result/output.wav"
    output_path = "output/result/combine.mp3"
    inst_path = f"output/{split_model}/audio/no_vocals.wav"
    with wave.open(vocal_path, "w") as wave_file:
        wave_file.setnchannels(1) 
        wave_file.setsampwidth(2)
        wave_file.setframerate(audio_data[0])
        wave_file.writeframes(audio_data[1].tobytes())
    command =  f'ffmpeg -y -i {inst_path} -i {vocal_path} -filter_complex [0:a]volume={inst_volume}[i];[1:a]volume={vocal_volume}[v];[i][v]amix=inputs=2:duration=longest[a] -map [a] -b:a 320k -c:a libmp3lame {output_path}'
    result = subprocess.run(command.split(), stdout=subprocess.PIPE)
    logger.info(result.stdout.decode())
    return output_path