Spaces:
Running
Running
File size: 8,596 Bytes
20d05ae 925d97e 20d05ae 925d97e 0cb3834 925d97e eb3ba2e 925d97e 42cf67e 925d97e f98d769 925d97e eb3ba2e 925d97e 20d05ae 0cb3834 37a0b8f 0cb3834 eb3ba2e 6f1ebe2 5837809 895578d f98d769 6f1ebe2 eb3ba2e f98d769 eb3ba2e 6f1ebe2 f98d769 6f1ebe2 f98d769 925d97e 6f1ebe2 0cb3834 eb3ba2e 20d05ae eb3ba2e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 |
import gradio as gr
import os
from constants import VOICE_METHODS, BARK_VOICES, EDGE_VOICES
import platform
from models.model import *
from tts.conversion import COQUI_LANGUAGES
import pytube
import os
import traceback
from pydub import AudioSegment
# from audio_enhance.functions import audio_enhance
def convert_yt_to_wav(url):
if not url:
return "Primero introduce el enlace del video", None
try:
print(f"Convirtiendo video {url}...")
# Descargar el video utilizando pytube
video = pytube.YouTube(url)
stream = video.streams.filter(only_audio=True).first()
video_output_folder = os.path.join(f"yt_videos") # Ruta de destino de la carpeta
audio_output_folder = 'audios'
print("Downloading video")
video_file_path = stream.download(output_path=video_output_folder)
print(video_file_path)
file_name = os.path.basename(video_file_path)
audio_file_path = os.path.join(audio_output_folder, file_name.replace('.mp4','.wav'))
# convert mp4 to wav
print("Converting to wav")
sound = AudioSegment.from_file(video_file_path,format="mp4")
sound.export(audio_file_path, format="wav")
if os.path.exists(video_file_path):
os.remove(video_file_path)
return "Success", audio_file_path
except ConnectionResetError as cre:
return "Se ha perdido la conexión, recarga o reintentalo nuevamente más tarde.", None
except Exception as e:
return str(e), None
with gr.Blocks() as app:
gr.HTML("<h1> Simple RVC Inference - by Juuxn 💻 </h1>")
with gr.Tab("Inferencia"):
model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Url del modelo", show_label=True)
audio_path = gr.Audio(label="Archivo de audio", show_label=True, type="filepath", )
f0_method = gr.Dropdown(choices=["harvest", "pm", "crepe", "crepe-tiny", "mangio-crepe", "mangio-crepe-tiny", "rmvpe"],
value="rmvpe",
label="Algoritmo", show_label=True)
# Salida
with gr.Row():
vc_output1 = gr.Textbox(label="Salida")
vc_output2 = gr.Audio(label="Audio de salida")
btn = gr.Button(value="Convertir")
btn.click(infer, inputs=[model_url, f0_method, audio_path], outputs=[vc_output1, vc_output2])
with gr.TabItem("TTS"):
with gr.Row():
tts_text = gr.Textbox(
label="Texto:",
placeholder="Texto que deseas convertir a voz...",
lines=6,
)
with gr.Column():
with gr.Row():
tts_model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Url del modelo RVC", show_label=True)
with gr.Row():
tts_method = gr.Dropdown(choices=VOICE_METHODS, value="Edge-tts", label="Método TTS:", visible=True)
tts_model = gr.Dropdown(choices=EDGE_VOICES, label="Modelo TTS:", visible=True, interactive=True)
tts_api_key = gr.Textbox(label="ElevenLabs Api key", show_label=True, placeholder="4a4afce72349680c8e8b6fdcfaf2b65a",interactive=True, visible=False)
tts_coqui_languages = gr.Radio(
label="Language",
choices=COQUI_LANGUAGES,
value="en",
visible=False
)
tts_btn = gr.Button(value="Convertir")
with gr.Row():
tts_vc_output1 = gr.Textbox(label="Salida")
tts_vc_output2 = gr.Audio(label="Audio de salida")
tts_btn.click(fn=tts_infer, inputs=[tts_text, tts_model_url, tts_method, tts_model, tts_api_key, tts_coqui_languages], outputs=[tts_vc_output1, tts_vc_output2])
tts_msg = gr.Markdown("""**Recomiendo que te crees una cuenta de eleven labs y pongas tu clave de api, es gratis y tienes 10k caracteres de limite al mes.** <br/>
![Imgur](https://imgur.com/HH6YTu0.png)
""", visible=False)
tts_method.change(fn=update_tts_methods_voice, inputs=[tts_method], outputs=[tts_model, tts_msg, tts_api_key, tts_coqui_languages])
with gr.TabItem("Youtube"):
gr.Markdown("## Convertir video de Youtube a audio")
with gr.Row():
yt_url = gr.Textbox(
label="Url del video:",
placeholder="https://www.youtube.com/watch?v=3vEiqil5d3Q"
)
yt_btn = gr.Button(value="Convertir")
with gr.Row():
yt_output1 = gr.Textbox(label="Salida")
yt_output2 = gr.Audio(label="Audio de salida")
yt_btn.click(fn=convert_yt_to_wav, inputs=[yt_url], outputs=[yt_output1, yt_output2])
# with gr.TabItem("Mejora de audio"):
# enhance_input_audio = gr.Audio(label="Audio de entrada")
# enhance_output_audio = gr.Audio(label="Audio de salida")
# btn_enhance_audio = gr.Button()
# # btn_enhance_audio.click(fn=audio_enhance, inputs=[enhance_input_audio], outputs=[enhance_output_audio])
with gr.Tab("Modelos"):
gr.HTML("<h4>Buscar modelos</h4>")
search_name = gr.Textbox(placeholder="Billie Eillish (RVC v2 - 100 epoch)", label="Nombre", show_label=True)
# Salida
with gr.Row():
sarch_output = gr.Markdown(label="Salida")
btn_search_model = gr.Button(value="Buscar")
btn_search_model.click(fn=search_model, inputs=[search_name], outputs=[sarch_output])
gr.HTML("<h4>Publica tu modelo</h4>")
post_name = gr.Textbox(placeholder="Billie Eillish (RVC v2 - 100 epoch)", label="Nombre", show_label=True)
post_model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Url del modelo", show_label=True)
post_creator = gr.Textbox(placeholder="ID de discord o enlace al perfil del creador", label="Creador", show_label=True)
post_version = gr.Dropdown(choices=["RVC v1", "RVC v2"], value="RVC v1", label="Versión", show_label=True)
# Salida
with gr.Row():
post_output = gr.Markdown(label="Salida")
btn_post_model = gr.Button(value="Publicar")
btn_post_model.click(fn=post_model, inputs=[post_name, post_model_url, post_version, post_creator], outputs=[post_output])
# with gr.Column():
# model_voice_path07 = gr.Dropdown(
# label=i18n("RVC Model:"),
# choices=sorted(names),
# value=default_weight,
# )
# best_match_index_path1, _ = match_index(
# model_voice_path07.value
# )
# file_index2_07 = gr.Dropdown(
# label=i18n("Select the .index file:"),
# choices=get_indexes(),
# value=best_match_index_path1,
# interactive=True,
# allow_custom_value=True,
# )
# with gr.Row():
# refresh_button_ = gr.Button(i18n("Refresh"), variant="primary")
# refresh_button_.click(
# fn=change_choices2,
# inputs=[],
# outputs=[model_voice_path07, file_index2_07],
# )
# with gr.Row():
# original_ttsvoice = gr.Audio(label=i18n("Audio TTS:"))
# ttsvoice = gr.Audio(label=i18n("Audio RVC:"))
# with gr.Row():
# button_test = gr.Button(i18n("Convert"), variant="primary")
# button_test.click(
# tts.use_tts,
# inputs=[
# text_test,
# tts_test,
# model_voice_path07,
# file_index2_07,
# # transpose_test,
# vc_transform0,
# f0method8,
# index_rate1,
# crepe_hop_length,
# f0_autotune,
# ttsmethod_test,
# ],
# outputs=[ttsvoice, original_ttsvoice],
# )
app.queue(concurrency_count=511, max_size=1022).launch()
#share=True |