File size: 5,237 Bytes
3925892
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import os
import re
import random
from scipy.io.wavfile import write, read
import numpy as np
import yt_dlp
import subprocess
from pydub import AudioSegment
from audio_separator.separator import Separator
from lib.infer import infer_audio
import edge_tts
import tempfile
import anyio
from pathlib import Path
from lib.language_tts import language_dict
import zipfile
import shutil
import urllib.request
import gdown
import streamlit as st

main_dir = Path().resolve()
print(main_dir)
os.chdir(main_dir)
models_dir = "models"

# Download audio using yt-dlp
def download_audio(url):
    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': 'ytdl/%(title)s.%(ext)s',
        'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'wav', 'preferredquality': '192'}],
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info_dict = ydl.extract_info(url, download=True)
        file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav'
        sample_rate, audio_data = read(file_path)
        audio_array = np.asarray(audio_data, dtype=np.int16)
        return sample_rate, audio_array


def separate_audio(input_audio, output_dir, model_voc_inst, model_deecho, model_back_voc):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    separator = Separator(output_dir=output_dir)

    vocals = os.path.join(output_dir, 'Vocals.wav')
    instrumental = os.path.join(output_dir, 'Instrumental.wav')
    vocals_reverb = os.path.join(output_dir, 'Vocals (Reverb).wav')
    vocals_no_reverb = os.path.join(output_dir, 'Vocals (No Reverb).wav')
    lead_vocals = os.path.join(output_dir, 'Lead Vocals.wav')
    backing_vocals = os.path.join(output_dir, 'Backing Vocals.wav')

    separator.load_model(model_filename=model_voc_inst)
    voc_inst = separator.separate(input_audio)
    os.rename(os.path.join(output_dir, voc_inst[0]), instrumental)
    os.rename(os.path.join(output_dir, voc_inst[1]), vocals)

    separator.load_model(model_filename=model_deecho)
    voc_no_reverb = separator.separate(vocals)
    os.rename(os.path.join(output_dir, voc_no_reverb[0]), vocals_no_reverb)
    os.rename(os.path.join(output_dir, voc_no_reverb[1]), vocals_reverb)

    separator.load_model(model_filename=model_back_voc)
    backing_voc = separator.separate(vocals_no_reverb)
    os.rename(os.path.join(output_dir, backing_voc[0]), backing_vocals)
    os.rename(os.path.join(output_dir, backing_voc[1]), lead_vocals)

    return instrumental, vocals, vocals_reverb, vocals_no_reverb, lead_vocals, backing_vocals


async def text_to_speech_edge(text, language_code):
    voice = language_dict.get(language_code, "default_voice")
    communicate = edge_tts.Communicate(text, voice)
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
        tmp_path = tmp_file.name
        await communicate.save(tmp_path)
    return tmp_path


# Streamlit UI
st.title("Hex RVC")

tabs = st.tabs(["Inference", "Download RVC Model", "Audio Separation"])

# Inference Tab
with tabs[0]:
    st.header("Inference")
    
    model_name = st.text_input("Model Name", placeholder="Enter model name")
    sound_path = st.text_input("Audio Path (Optional)", placeholder="Leave blank to upload audio")
    uploaded_audio = st.file_uploader("Upload Audio", type=["wav", "mp3"])

    if uploaded_audio is not None:
        with open("uploaded_audio.wav", "wb") as f:
            f.write(uploaded_audio.read())
        sound_path = "uploaded_audio.wav"

    f0_change = st.number_input("Pitch Change (semitones)", value=0)
    f0_method = st.selectbox("F0 Method", ["crepe", "harvest", "mangio-crepe", "rmvpe", "rmvpe+", "fcpe", "hybrid[rmvpe+fcpe]"], index=5)
    
    if st.button("Run Inference"):
        st.write("Running inference...")

# Download RVC Model Tab
with tabs[1]:
    st.header("Download RVC Model")
    url = st.text_input("Model URL")
    dir_name = st.text_input("Model Name")

    if st.button("Download Model"):
        try:
            download_online_model(url, dir_name)
            st.success(f"Model {dir_name} downloaded successfully!")
        except Exception as e:
            st.error(str(e))

# Audio Separation Tab
with tabs[2]:
    st.header("Audio Separation")
    input_audio = st.file_uploader("Upload Audio for Separation", type=["wav", "mp3"])

    if input_audio is not None:
        with open("input_audio.wav", "wb") as f:
            f.write(input_audio.read())
        st.write("Audio uploaded successfully.")

        if st.button("Separate Audio"):
            st.write("Separating audio...")
            output_dir = "./separated_audio"
            inst, voc, voc_rev, voc_no_rev, lead_voc, back_voc = separate_audio("input_audio.wav", output_dir, 
                                                                               'model_bs_roformer.ckpt',
                                                                               'UVR-DeEcho-DeReverb.pth',
                                                                               'mel_band_karaoke.ckpt')
            st.audio(inst)
            st.audio(voc)
            st.audio(voc_rev)
            st.audio(voc_no_rev)
            st.audio(lead_voc)
            st.audio(back_voc)