|
import os |
|
import re |
|
import random |
|
from scipy.io.wavfile import write |
|
from scipy.io.wavfile import read |
|
import numpy as np |
|
import gradio as gr |
|
import yt_dlp |
|
import subprocess |
|
from pydub import AudioSegment |
|
from audio_separator.separator import Separator |
|
from lib.infer import infer_audio |
|
import edge_tts |
|
import tempfile |
|
import anyio |
|
|
|
|
|
language_dict = { |
|
'English-Jenny (Female)': 'en-US-JennyNeural', |
|
'English-Guy (Male)': 'en-US-GuyNeural', |
|
'English-Ana (Female)': 'en-US-AnaNeural', |
|
'English-Aria (Female)': 'en-US-AriaNeural', |
|
'English-Christopher (Male)': 'en-US-ChristopherNeural', |
|
'English-Eric (Male)': 'en-US-EricNeural', |
|
'English-Michelle (Female)': 'en-US-MichelleNeural', |
|
'English-Roger (Male)': 'en-US-RogerNeural', |
|
'Spanish (Mexican)-Dalia (Female)': 'es-MX-DaliaNeural', |
|
'Spanish (Mexican)-Jorge- (Male)': 'es-MX-JorgeNeural', |
|
'Korean-Sun-Hi- (Female)': 'ko-KR-SunHiNeural', |
|
'Korean-InJoon- (Male)': 'ko-KR-InJoonNeural', |
|
'Thai-Premwadee- (Female)': 'th-TH-PremwadeeNeural', |
|
'Thai-Niwat- (Male)': 'th-TH-NiwatNeural', |
|
'Vietnamese-HoaiMy- (Female)': 'vi-VN-HoaiMyNeural', |
|
'Vietnamese-NamMinh- (Male)': 'vi-VN-NamMinhNeural', |
|
'Japanese-Nanami- (Female)': 'ja-JP-NanamiNeural', |
|
'Japanese-Keita- (Male)': 'ja-JP-KeitaNeural', |
|
'French-Denise- (Female)': 'fr-FR-DeniseNeural', |
|
'French-Eloise- (Female)': 'fr-FR-EloiseNeural', |
|
'French-Henri- (Male)': 'fr-FR-HenriNeural', |
|
'Brazilian-Francisca- (Female)': 'pt-BR-FranciscaNeural', |
|
'Brazilian-Antonio- (Male)': 'pt-BR-AntonioNeural', |
|
'Indonesian-Ardi- (Male)': 'id-ID-ArdiNeural', |
|
'Indonesian-Gadis- (Female)': 'id-ID-GadisNeural', |
|
'Hebrew-Avri- (Male)': 'he-IL-AvriNeural', |
|
'Hebrew-Hila- (Female)': 'he-IL-HilaNeural', |
|
'Italian-Isabella- (Female)': 'it-IT-IsabellaNeural', |
|
'Italian-Diego- (Male)': 'it-IT-DiegoNeural', |
|
'Italian-Elsa- (Female)': 'it-IT-ElsaNeural', |
|
'Dutch-Colette- (Female)': 'nl-NL-ColetteNeural', |
|
'Dutch-Fenna- (Female)': 'nl-NL-FennaNeural', |
|
'Dutch-Maarten- (Male)': 'nl-NL-MaartenNeural', |
|
'Malese-Osman- (Male)': 'ms-MY-OsmanNeural', |
|
'Malese-Yasmin- (Female)': 'ms-MY-YasminNeural', |
|
'Norwegian-Pernille- (Female)': 'nb-NO-PernilleNeural', |
|
'Norwegian-Finn- (Male)': 'nb-NO-FinnNeural', |
|
'Swedish-Sofie- (Female)': 'sv-SE-SofieNeural', |
|
'ArabicSwedish-Mattias- (Male)': 'sv-SE-MattiasNeural', |
|
'Arabic-Hamed- (Male)': 'ar-SA-HamedNeural', |
|
'Arabic-Zariyah- (Female)': 'ar-SA-ZariyahNeural', |
|
'Greek-Athina- (Female)': 'el-GR-AthinaNeural', |
|
'Greek-Nestoras- (Male)': 'el-GR-NestorasNeural', |
|
'German-Katja- (Female)': 'de-DE-KatjaNeural', |
|
'German-Amala- (Female)': 'de-DE-AmalaNeural', |
|
'German-Conrad- (Male)': 'de-DE-ConradNeural', |
|
'German-Killian- (Male)': 'de-DE-KillianNeural', |
|
'Afrikaans-Adri- (Female)': 'af-ZA-AdriNeural', |
|
'Afrikaans-Willem- (Male)': 'af-ZA-WillemNeural', |
|
'Ethiopian-Ameha- (Male)': 'am-ET-AmehaNeural', |
|
'Ethiopian-Mekdes- (Female)': 'am-ET-MekdesNeural', |
|
'Arabic (UAD)-Fatima- (Female)': 'ar-AE-FatimaNeural', |
|
'Arabic (UAD)-Hamdan- (Male)': 'ar-AE-HamdanNeural', |
|
'Arabic (Bahrain)-Ali- (Male)': 'ar-BH-AliNeural', |
|
'Arabic (Bahrain)-Laila- (Female)': 'ar-BH-LailaNeural', |
|
'Arabic (Algeria)-Ismael- (Male)': 'ar-DZ-IsmaelNeural', |
|
'Arabic (Egypt)-Salma- (Female)': 'ar-EG-SalmaNeural', |
|
'Arabic (Egypt)-Shakir- (Male)': 'ar-EG-ShakirNeural', |
|
'Arabic (Iraq)-Bassel- (Male)': 'ar-IQ-BasselNeural', |
|
'Arabic (Iraq)-Rana- (Female)': 'ar-IQ-RanaNeural', |
|
'Arabic (Jordan)-Sana- (Female)': 'ar-JO-SanaNeural', |
|
'Arabic (Jordan)-Taim- (Male)': 'ar-JO-TaimNeural', |
|
'Arabic (Kuwait)-Fahed- (Male)': 'ar-KW-FahedNeural', |
|
'Arabic (Kuwait)-Noura- (Female)': 'ar-KW-NouraNeural', |
|
'Arabic (Lebanon)-Layla- (Female)': 'ar-LB-LaylaNeural', |
|
'Arabic (Lebanon)-Rami- (Male)': 'ar-LB-RamiNeural', |
|
'Arabic (Libya)-Iman- (Female)': 'ar-LY-ImanNeural', |
|
'Arabic (Libya)-Omar- (Male)': 'ar-LY-OmarNeural', |
|
'Arabic (Morocco)-Jamal- (Male)': 'ar-MA-JamalNeural', |
|
'Arabic (Morocco)-Mouna- (Female)': 'ar-MA-MounaNeural', |
|
'Arabic (Oman)-Abdullah- (Male)': 'ar-OM-AbdullahNeural', |
|
'Arabic (Oman)-Aysha- (Female)': 'ar-OM-AyshaNeural', |
|
'Arabic (Qatar)-Amal- (Female)': 'ar-QA-AmalNeural', |
|
'Arabic (Qatar)-Moaz- (Male)': 'ar-QA-MoazNeural', |
|
'Arabic (Syrian Arab Republic)-Amany- (Female)': 'ar-SY-AmanyNeural', |
|
'Arabic (Syrian Arab Republic)-Laith- (Male)': 'ar-SY-LaithNeural', |
|
'Arabic (Tunisia)-Hedi- (Male)': 'ar-TN-HediNeural', |
|
'Arabic (Tunisia)-Reem- (Female)': 'ar-TN-ReemNeural', |
|
'Arabic (Yemen )-Maryam- (Female)': 'ar-YE-MaryamNeural', |
|
'Arabic (Yemen )-Saleh- (Male)': 'ar-YE-SalehNeural', |
|
'Azerbaijani-Babek- (Male)': 'az-AZ-BabekNeural', |
|
'Azerbaijani-Banu- (Female)': 'az-AZ-BanuNeural', |
|
'Bulgarian-Borislav- (Male)': 'bg-BG-BorislavNeural', |
|
'Bulgarian-Kalina- (Female)': 'bg-BG-KalinaNeural', |
|
'Bengali (Bangladesh)-Nabanita- (Female)': 'bn-BD-NabanitaNeural', |
|
'Bengali (Bangladesh)-Pradeep- (Male)': 'bn-BD-PradeepNeural', |
|
'Bengali (India)-Bashkar- (Male)': 'bn-IN-BashkarNeural', |
|
'Bengali (India)-Tanishaa- (Female)': 'bn-IN-TanishaaNeural', |
|
'Bosniak (Bosnia and Herzegovina)-Goran- (Male)': 'bs-BA-GoranNeural', |
|
'Bosniak (Bosnia and Herzegovina)-Vesna- (Female)': 'bs-BA-VesnaNeural', |
|
'Catalan (Spain)-Joana- (Female)': 'ca-ES-JoanaNeural', |
|
'Catalan (Spain)-Enric- (Male)': 'ca-ES-EnricNeural', |
|
'Czech (Czech Republic)-Antonin- (Male)': 'cs-CZ-AntoninNeural', |
|
'Czech (Czech Republic)-Vlasta- (Female)': 'cs-CZ-VlastaNeural', |
|
'Welsh (UK)-Aled- (Male)': 'cy-GB-AledNeural', |
|
'Welsh (UK)-Nia- (Female)': 'cy-GB-NiaNeural', |
|
'Danish (Denmark)-Christel- (Female)': 'da-DK-ChristelNeural', |
|
'Danish (Denmark)-Jeppe- (Male)': 'da-DK-JeppeNeural', |
|
'German (Austria)-Ingrid- (Female)': 'de-AT-IngridNeural', |
|
'German (Austria)-Jonas- (Male)': 'de-AT-JonasNeural', |
|
'German (Switzerland)-Jan- (Male)': 'de-CH-JanNeural', |
|
'German (Switzerland)-Leni- (Female)': 'de-CH-LeniNeural', |
|
'English (Australia)-Natasha- (Female)': 'en-AU-NatashaNeural', |
|
'English (Australia)-William- (Male)': 'en-AU-WilliamNeural', |
|
'English (Canada)-Clara- (Female)': 'en-CA-ClaraNeural', |
|
'English (Canada)-Liam- (Male)': 'en-CA-LiamNeural', |
|
'English (UK)-Libby- (Female)': 'en-GB-LibbyNeural', |
|
'English (UK)-Maisie- (Female)': 'en-GB-MaisieNeural', |
|
'English (UK)-Ryan- (Male)': 'en-GB-RyanNeural', |
|
'English (UK)-Sonia- (Female)': 'en-GB-SoniaNeural', |
|
'English (UK)-Thomas- (Male)': 'en-GB-ThomasNeural', |
|
'English (Hong Kong)-Sam- (Male)': 'en-HK-SamNeural', |
|
'English (Hong Kong)-Yan- (Female)': 'en-HK-YanNeural', |
|
'English (Ireland)-Connor- (Male)': 'en-IE-ConnorNeural', |
|
'English (Ireland)-Emily- (Female)': 'en-IE-EmilyNeural', |
|
'English (India)-Neerja- (Female)': 'en-IN-NeerjaNeural', |
|
'English (India)-Prabhat- (Male)': 'en-IN-PrabhatNeural', |
|
'English (Kenya)-Asilia- (Female)': 'en-KE-AsiliaNeural', |
|
'English (Kenya)-Chilemba- (Male)': 'en-KE-ChilembaNeural', |
|
'English (Nigeria)-Abeo- (Male)': 'en-NG-AbeoNeural', |
|
'English (Nigeria)-Ezinne- (Female)': 'en-NG-EzinneNeural', |
|
'English (New Zealand)-Mitchell- (Male)': 'en-NZ-MitchellNeural', |
|
'English (Philippines)-James- (Male)': 'en-PH-JamesNeural', |
|
'English (Philippines)-Rosa- (Female)': 'en-PH-RosaNeural', |
|
'English (Singapore)-Luna- (Female)': 'en-SG-LunaNeural', |
|
'English (Singapore)-Wayne- (Male)': 'en-SG-WayneNeural', |
|
'English (Tanzania)-Elimu- (Male)': 'en-TZ-ElimuNeural', |
|
'English (Tanzania)-Imani- (Female)': 'en-TZ-ImaniNeural', |
|
'English (South Africa)-Leah- (Female)': 'en-ZA-LeahNeural', |
|
'English (South Africa)-Luke- (Male)': 'en-ZA-LukeNeural', |
|
'Spanish (Argentina)-Elena- (Female)': 'es-AR-ElenaNeural', |
|
'Spanish (Argentina)-Tomas- (Male)': 'es-AR-TomasNeural', |
|
'Spanish (Bolivia)-Marcelo- (Male)': 'es-BO-MarceloNeural', |
|
'Spanish (Bolivia)-Sofia- (Female)': 'es-BO-SofiaNeural', |
|
'Spanish (Colombia)-Gonzalo- (Male)': 'es-CO-GonzaloNeural', |
|
'Spanish (Colombia)-Salome- (Female)': 'es-CO-SalomeNeural', |
|
'Spanish (Costa Rica)-Juan- (Male)': 'es-CR-JuanNeural', |
|
'Spanish (Costa Rica)-Maria- (Female)': 'es-CR-MariaNeural', |
|
'Spanish (Cuba)-Belkys- (Female)': 'es-CU-BelkysNeural', |
|
'Spanish (Dominican Republic)-Emilio- (Male)': 'es-DO-EmilioNeural', |
|
'Spanish (Dominican Republic)-Ramona- (Female)': 'es-DO-RamonaNeural', |
|
'Spanish (Ecuador)-Andrea- (Female)': 'es-EC-AndreaNeural', |
|
'Spanish (Ecuador)-Luis- (Male)': 'es-EC-LuisNeural', |
|
'Spanish (Spain)-Alvaro- (Male)': 'es-ES-AlvaroNeural', |
|
'Spanish (Spain)-Elvira- (Female)': 'es-ES-ElviraNeural', |
|
'Spanish (Equatorial Guinea)-Teresa- (Female)': 'es-GQ-TeresaNeural', |
|
'Spanish (Guatemala)-Andres- (Male)': 'es-GT-AndresNeural', |
|
'Spanish (Guatemala)-Marta- (Female)': 'es-GT-MartaNeural', |
|
'Spanish (Honduras)-Carlos- (Male)': 'es-HN-CarlosNeural', |
|
'Spanish (Honduras)-Karla- (Female)': 'es-HN-KarlaNeural', |
|
'Spanish (Nicaragua)-Federico- (Male)': 'es-NI-FedericoNeural', |
|
'Spanish (Nicaragua)-Yolanda- (Female)': 'es-NI-YolandaNeural', |
|
'Spanish (Panama)-Margarita- (Female)': 'es-PA-MargaritaNeural', |
|
'Spanish (Panama)-Roberto- (Male)': 'es-PA-RobertoNeural', |
|
'Spanish (Peru)-Alex- (Male)': 'es-PE-AlexNeural', |
|
'Spanish (Peru)-Camila- (Female)': 'es-PE-CamilaNeural', |
|
'Spanish (Puerto Rico)-Karina- (Female)': 'es-PR-KarinaNeural', |
|
'Spanish (Puerto Rico)-Victor- (Male)': 'es-PR-VictorNeural', |
|
'Spanish (Paraguay)-Mario- (Male)': 'es-PY-MarioNeural', |
|
'Spanish (Paraguay)-Tania- (Female)': 'es-PY-TaniaNeural', |
|
'Spanish (El Salvador)-Lorena- (Female)': 'es-SV-LorenaNeural', |
|
'Spanish (El Salvador)-Rodrigo- (Male)': 'es-SV-RodrigoNeural', |
|
'Spanish (United States)-Alonso- (Male)': 'es-US-AlonsoNeural', |
|
'Spanish (United States)-Paloma- (Female)': 'es-US-PalomaNeural', |
|
'Spanish (Uruguay)-Mateo- (Male)': 'es-UY-MateoNeural', |
|
'Spanish (Uruguay)-Valentina- (Female)': 'es-UY-ValentinaNeural', |
|
'Spanish (Venezuela)-Paola- (Female)': 'es-VE-PaolaNeural', |
|
'Spanish (Venezuela)-Sebastian- (Male)': 'es-VE-SebastianNeural', |
|
'Estonian (Estonia)-Anu- (Female)': 'et-EE-AnuNeural', |
|
'Estonian (Estonia)-Kert- (Male)': 'et-EE-KertNeural', |
|
'Persian (Iran)-Dilara- (Female)': 'fa-IR-DilaraNeural', |
|
'Persian (Iran)-Farid- (Male)': 'fa-IR-FaridNeural', |
|
'Finnish (Finland)-Harri- (Male)': 'fi-FI-HarriNeural', |
|
'Finnish (Finland)-Noora- (Female)': 'fi-FI-NooraNeural', |
|
'French (Belgium)-Charline- (Female)': 'fr-BE-CharlineNeural', |
|
'French (Belgium)-Gerard- (Male)': 'fr-BE-GerardNeural', |
|
'French (Canada)-Sylvie- (Female)': 'fr-CA-SylvieNeural', |
|
'French (Canada)-Antoine- (Male)': 'fr-CA-AntoineNeural', |
|
'French (Canada)-Jean- (Male)': 'fr-CA-JeanNeural', |
|
'French (Switzerland)-Ariane- (Female)': 'fr-CH-ArianeNeural', |
|
'French (Switzerland)-Fabrice- (Male)': 'fr-CH-FabriceNeural', |
|
'Irish (Ireland)-Colm- (Male)': 'ga-IE-ColmNeural', |
|
'Irish (Ireland)-Orla- (Female)': 'ga-IE-OrlaNeural', |
|
'Galician (Spain)-Roi- (Male)': 'gl-ES-RoiNeural', |
|
'Galician (Spain)-Sabela- (Female)': 'gl-ES-SabelaNeural', |
|
'Gujarati (India)-Dhwani- (Female)': 'gu-IN-DhwaniNeural', |
|
'Gujarati (India)-Niranjan- (Male)': 'gu-IN-NiranjanNeural', |
|
'Hindi (India)-Madhur- (Male)': 'hi-IN-MadhurNeural', |
|
'Hindi (India)-Swara- (Female)': 'hi-IN-SwaraNeural', |
|
'Croatian (Croatia)-Gabrijela- (Female)': 'hr-HR-GabrijelaNeural', |
|
'Croatian (Croatia)-Srecko- (Male)': 'hr-HR-SreckoNeural', |
|
'Hungarian (Hungary)-Noemi- (Female)': 'hu-HU-NoemiNeural', |
|
'Hungarian (Hungary)-Tamas- (Male)': 'hu-HU-TamasNeural', |
|
'Icelandic (Iceland)-Gudrun- (Female)': 'is-IS-GudrunNeural', |
|
'Icelandic (Iceland)-Gunnar- (Male)': 'is-IS-GunnarNeural', |
|
'Javanese (Indonesia)-Dimas- (Male)': 'jv-ID-DimasNeural', |
|
'Javanese (Indonesia)-Siti- (Female)': 'jv-ID-SitiNeural', |
|
'Georgian (Georgia)-Eka- (Female)': 'ka-GE-EkaNeural', |
|
'Georgian (Georgia)-Giorgi- (Male)': 'ka-GE-GiorgiNeural', |
|
'Kazakh (Kazakhstan)-Aigul- (Female)': 'kk-KZ-AigulNeural', |
|
'Kazakh (Kazakhstan)-Daulet- (Male)': 'kk-KZ-DauletNeural', |
|
'Khmer (Cambodia)-Piseth- (Male)': 'km-KH-PisethNeural', |
|
'Khmer (Cambodia)-Sreymom- (Female)': 'km-KH-SreymomNeural', |
|
'Kannada (India)-Gagan- (Male)': 'kn-IN-GaganNeural', |
|
'Kannada (India)-Sapna- (Female)': 'kn-IN-SapnaNeural', |
|
'Lao (Laos)-Chanthavong- (Male)': 'lo-LA-ChanthavongNeural', |
|
'Lao (Laos)-Keomany- (Female)': 'lo-LA-KeomanyNeural', |
|
'Lithuanian (Lithuania)-Leonas- (Male)': 'lt-LT-LeonasNeural', |
|
'Lithuanian (Lithuania)-Ona- (Female)': 'lt-LT-OnaNeural', |
|
'Latvian (Latvia)-Everita- (Female)': 'lv-LV-EveritaNeural', |
|
'Latvian (Latvia)-Nils- (Male)': 'lv-LV-NilsNeural', |
|
'Macedonian (North Macedonia)-Aleksandar- (Male)': 'mk-MK-AleksandarNeural', |
|
'Macedonian (North Macedonia)-Marija- (Female)': 'mk-MK-MarijaNeural', |
|
'Malayalam (India)-Midhun- (Male)': 'ml-IN-MidhunNeural', |
|
'Malayalam (India)-Sobhana- (Female)': 'ml-IN-SobhanaNeural', |
|
'Mongolian (Mongolia)-Bataa- (Male)': 'mn-MN-BataaNeural', |
|
'Mongolian (Mongolia)-Yesui- (Female)': 'mn-MN-YesuiNeural', |
|
'Marathi (India)-Aarohi- (Female)': 'mr-IN-AarohiNeural', |
|
'Marathi (India)-Manohar- (Male)': 'mr-IN-ManoharNeural', |
|
'Maltese (Malta)-Grace- (Female)': 'mt-MT-GraceNeural', |
|
'Maltese (Malta)-Joseph- (Male)': 'mt-MT-JosephNeural', |
|
'Burmese (Myanmar)-Nilar- (Female)': 'my-MM-NilarNeural', |
|
'Burmese (Myanmar)-Thiha- (Male)': 'my-MM-ThihaNeural', |
|
'Nepali (Nepal)-Hemkala- (Female)': 'ne-NP-HemkalaNeural', |
|
'Nepali (Nepal)-Sagar- (Male)': 'ne-NP-SagarNeural', |
|
'Dutch (Belgium)-Arnaud- (Male)': 'nl-BE-ArnaudNeural', |
|
'Dutch (Belgium)-Dena- (Female)': 'nl-BE-DenaNeural', |
|
'Polish (Poland)-Marek- (Male)': 'pl-PL-MarekNeural', |
|
'Polish (Poland)-Zofia- (Female)': 'pl-PL-ZofiaNeural', |
|
'Pashto (Afghanistan)-Gul Nawaz- (Male)': 'ps-AF-Gul', |
|
} |
|
|
|
|
|
|
|
def download_audio(url): |
|
ydl_opts = { |
|
'format': 'bestaudio/best', |
|
'outtmpl': 'ytdl/%(title)s.%(ext)s', |
|
'postprocessors': [{ |
|
'key': 'FFmpegExtractAudio', |
|
'preferredcodec': 'wav', |
|
'preferredquality': '192', |
|
}], |
|
} |
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
|
info_dict = ydl.extract_info(url, download=True) |
|
file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav' |
|
sample_rate, audio_data = read(file_path) |
|
audio_array = np.asarray(audio_data, dtype=np.int16) |
|
|
|
return sample_rate, audio_array |
|
|
|
|
|
|
|
|
|
|
|
def separate_audio(input_audio, output_dir, model_voc_inst, model_deecho, model_back_voc): |
|
|
|
if not os.path.exists(output_dir): |
|
os.makedirs(output_dir) |
|
|
|
separator = Separator(output_dir=output_dir) |
|
|
|
|
|
vocals = os.path.join(output_dir, 'Vocals.wav') |
|
instrumental = os.path.join(output_dir, 'Instrumental.wav') |
|
vocals_reverb = os.path.join(output_dir, 'Vocals (Reverb).wav') |
|
vocals_no_reverb = os.path.join(output_dir, 'Vocals (No Reverb).wav') |
|
lead_vocals = os.path.join(output_dir, 'Lead Vocals.wav') |
|
backing_vocals = os.path.join(output_dir, 'Backing Vocals.wav') |
|
|
|
|
|
separator.load_model(model_filename=model_voc_inst) |
|
voc_inst = separator.separate(input_audio) |
|
os.rename(os.path.join(output_dir, voc_inst[0]), instrumental) |
|
os.rename(os.path.join(output_dir, voc_inst[1]), vocals) |
|
|
|
|
|
separator.load_model(model_filename=model_deecho) |
|
voc_no_reverb = separator.separate(vocals) |
|
os.rename(os.path.join(output_dir, voc_no_reverb[0]), vocals_no_reverb) |
|
os.rename(os.path.join(output_dir, voc_no_reverb[1]), vocals_reverb) |
|
|
|
|
|
separator.load_model(model_filename=model_back_voc) |
|
backing_voc = separator.separate(vocals_no_reverb) |
|
os.rename(os.path.join(output_dir, backing_voc[0]), backing_vocals) |
|
os.rename(os.path.join(output_dir, backing_voc[1]), lead_vocals) |
|
|
|
return instrumental, vocals, vocals_reverb, vocals_no_reverb, lead_vocals, backing_vocals |
|
|
|
|
|
|
|
def process_audio(MODEL_NAME, SOUND_PATH, F0_CHANGE, F0_METHOD, MIN_PITCH, MAX_PITCH, CREPE_HOP_LENGTH, INDEX_RATE, |
|
FILTER_RADIUS, RMS_MIX_RATE, PROTECT, SPLIT_INFER, MIN_SILENCE, SILENCE_THRESHOLD, SEEK_STEP, |
|
KEEP_SILENCE, FORMANT_SHIFT, QUEFRENCY, TIMBRE, F0_AUTOTUNE, OUTPUT_FORMAT, upload_audio=None): |
|
|
|
|
|
if not SOUND_PATH and upload_audio is not None: |
|
SOUND_PATH = os.path.join("uploaded_audio", upload_audio.name) |
|
with open(SOUND_PATH, "wb") as f: |
|
f.write(upload_audio.read()) |
|
|
|
|
|
if not MODEL_NAME: |
|
return "Please provide a model name." |
|
|
|
|
|
os.system("chmod +x stftpitchshift") |
|
inferred_audio = infer_audio( |
|
MODEL_NAME, |
|
SOUND_PATH, |
|
F0_CHANGE, |
|
F0_METHOD, |
|
MIN_PITCH, |
|
MAX_PITCH, |
|
CREPE_HOP_LENGTH, |
|
INDEX_RATE, |
|
FILTER_RADIUS, |
|
RMS_MIX_RATE, |
|
PROTECT, |
|
SPLIT_INFER, |
|
MIN_SILENCE, |
|
SILENCE_THRESHOLD, |
|
SEEK_STEP, |
|
KEEP_SILENCE, |
|
FORMANT_SHIFT, |
|
QUEFRENCY, |
|
TIMBRE, |
|
F0_AUTOTUNE, |
|
OUTPUT_FORMAT |
|
) |
|
|
|
return inferred_audio |
|
|
|
|
|
async def text_to_speech_edge(text, language_code): |
|
voice = language_dict.get(language_code, "default_voice") |
|
communicate = edge_tts.Communicate(text, voice) |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file: |
|
tmp_path = tmp_file.name |
|
await communicate.save(tmp_path) |
|
return tmp_path |
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="Hex RVC") as app: |
|
gr.Markdown("# Hex RVC") |
|
|
|
with gr.Tab("Inference"): |
|
with gr.Row(): |
|
MODEL_NAME = gr.Textbox(label="Model Name", placeholder="Enter model name") |
|
SOUND_PATH = gr.Textbox(label="Audio Path (Optional)", placeholder="Leave blank to upload audio") |
|
upload_audio = gr.File(label="Upload Audio", type='filepath', file_types=["audio"]) |
|
|
|
with gr.Row(): |
|
F0_CHANGE = gr.Number(label="Pitch Change (semitones)", value=0) |
|
F0_METHOD = gr.Dropdown(choices=["crepe", "harvest", "mangio-crepe", "rmvpe", "rmvpe+", "fcpe", |
|
"hybrid[mangio-crepe+rmvpe]", "hybrid[mangio-crepe+fcpe]", |
|
"hybrid[rmvpe+fcpe]", "hybrid[mangio-crepe+rmvpe+fcpe]"], |
|
label="F0 Method", value="fcpe") |
|
|
|
with gr.Row(): |
|
MIN_PITCH = gr.Textbox(label="Min Pitch", value="50") |
|
MAX_PITCH = gr.Textbox(label="Max Pitch", value="1100") |
|
CREPE_HOP_LENGTH = gr.Number(label="Crepe Hop Length", value=120) |
|
INDEX_RATE = gr.Slider(label="Index Rate", minimum=0, maximum=1, value=0.75) |
|
FILTER_RADIUS = gr.Number(label="Filter Radius", value=3) |
|
RMS_MIX_RATE = gr.Slider(label="RMS Mix Rate", minimum=0, maximum=1, value=0.25) |
|
PROTECT = gr.Slider(label="Protect", minimum=0, maximum=1, value=0.33) |
|
|
|
with gr.Accordion("Hex TTS"): |
|
input_text = gr.Textbox(lines=5, label="Input Text") |
|
|
|
|
|
language = gr.Dropdown(choices=list(language_dict.keys()), label="Choose the Voice Model") |
|
tts_convert = gr.Button("Convert") |
|
tts_convert.click(fn=text_to_speech_edge inputs=[input_text, language], output=upload_audio) |
|
with gr.Accordion("Advanced Settings", open=False): |
|
SPLIT_INFER = gr.Checkbox(label="Enable Split Inference", value=False) |
|
MIN_SILENCE = gr.Number(label="Min Silence (ms)", value=500) |
|
SILENCE_THRESHOLD = gr.Number(label="Silence Threshold (dBFS)", value=-50) |
|
SEEK_STEP = gr.Slider(label="Seek Step (ms)", minimum=1, maximum=10, value=1) |
|
KEEP_SILENCE = gr.Number(label="Keep Silence (ms)", value=200) |
|
FORMANT_SHIFT = gr.Checkbox(label="Enable Formant Shift", value=False) |
|
QUEFRENCY = gr.Number(label="Quefrency", value=0) |
|
TIMBRE = gr.Number(label="Timbre", value=1) |
|
F0_AUTOTUNE = gr.Checkbox(label="Enable F0 Autotune", value=False) |
|
OUTPUT_FORMAT = gr.Dropdown(choices=["wav", "flac", "mp3"], label="Output Format", value="wav") |
|
|
|
run_button = gr.Button("Run Inference") |
|
output_audio = gr.Audio(label="Generated Audio", type='filepath') |
|
|
|
run_button.click( |
|
process_audio, |
|
inputs=[MODEL_NAME, SOUND_PATH, F0_CHANGE, F0_METHOD, MIN_PITCH, MAX_PITCH, CREPE_HOP_LENGTH, INDEX_RATE, |
|
FILTER_RADIUS, RMS_MIX_RATE, PROTECT, SPLIT_INFER, MIN_SILENCE, SILENCE_THRESHOLD, SEEK_STEP, |
|
KEEP_SILENCE, FORMANT_SHIFT, QUEFRENCY, TIMBRE, F0_AUTOTUNE, OUTPUT_FORMAT, upload_audio], |
|
outputs=output_audio |
|
) |
|
|
|
with gr.Tab("Audio Separation"): |
|
with gr.Row(): |
|
input_audio = gr.Audio(source="upload", type="filepath", label="Upload Audio File") |
|
output_dir = gr.Textbox(value="/content/output", label="Output Directory") |
|
|
|
with gr.Accordion("Separation by Link", open = False): |
|
with gr.Row(): |
|
roformer_link = gr.Textbox( |
|
label = "Link", |
|
placeholder = "Paste the link here", |
|
interactive = True |
|
) |
|
with gr.Row(): |
|
gr.Markdown("You can paste the link to the video/audio from many sites, check the complete list [here](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)") |
|
with gr.Row(): |
|
roformer_download_button = gr.Button( |
|
"Download!", |
|
variant = "primary" |
|
) |
|
|
|
roformer_download_button.click(download_audio, [roformer_link], [input_audio]) |
|
|
|
with gr.Row(): |
|
model_voc_inst = gr.Textbox(value='model_bs_roformer_ep_317_sdr_12.9755.ckpt', label="Vocal & Instrumental Model") |
|
model_deecho = gr.Textbox(value='UVR-DeEcho-DeReverb.pth', label="DeEcho-DeReverb Model") |
|
model_back_voc = gr.Textbox(value='mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt', label="Backing Vocals Model") |
|
|
|
separate_button = gr.Button("Separate Audio") |
|
|
|
with gr.Row(): |
|
instrumental_out = gr.Audio(label="Instrumental") |
|
vocals_out = gr.Audio(label="Vocals") |
|
vocals_reverb_out = gr.Audio(label="Vocals (Reverb)") |
|
vocals_no_reverb_out = gr.Audio(label="Vocals (No Reverb)") |
|
lead_vocals_out = gr.Audio(label="Lead Vocals") |
|
backing_vocals_out = gr.Audio(label="Backing Vocals") |
|
|
|
separate_button.click( |
|
separate_audio, |
|
inputs=[input_audio, output_dir, model_voc_inst, model_deecho, model_back_voc], |
|
outputs=[instrumental_out, vocals_out, vocals_reverb_out, vocals_no_reverb_out, lead_vocals_out, backing_vocals_out] |
|
) |
|
|
|
|
|
|
|
app.launch() |
|
|