talktalkai-models

Running

App Files Files Community

xJuuzouYTx commited on Nov 1, 2023

Commit

925d97e

•

1 Parent(s): 97fc03f

[ADD] youtube video download as wav

Browse files

Files changed (8) hide show

.gitignore +1 -0
app.py +62 -157
audio_enhance/functions.py +24 -0
tts/constants.py → constants.py +3 -0
models/model.py +160 -0
requirements.txt +6 -1
tts/conversion.py +13 -6
tts/test.py +1 -0

.gitignore CHANGED Viewed

@@ -4,6 +4,7 @@ __pycache__
 /audios/
 /audio-outputs/
 /LOGS
 /RUNTIME
 *.pyd
 hubert_base.pt

 /audios/
 /audio-outputs/
 /LOGS
+/yt_videos
 /RUNTIME
 *.pyd
 hubert_base.pt

app.py CHANGED Viewed

@@ -1,166 +1,48 @@
 import gradio as gr
-from inference import Inference
 import os
-import zipfile
-import hashlib
-from utils.model import model_downloader, get_model
-import requests
-import json
-import torch
-from tts.constants import VOICE_METHODS, BARK_VOICES, EDGE_VOICES
-from tts.conversion import tts_infer, ELEVENLABS_VOICES_RAW, ELEVENLABS_VOICES_NAMES, COQUI_LANGUAGES
-api_url = "https://rvc-models-api.onrender.com/uploadfile/"
-zips_folder = "./zips"
-unzips_folder = "./unzips"
-if not os.path.exists(zips_folder):
-      os.mkdir(zips_folder)
-if not os.path.exists(unzips_folder):
-  os.mkdir(unzips_folder)
-def get_info(path):
-    path = os.path.join(unzips_folder, path)
-    try:
-        a = torch.load(path, map_location="cpu")
-        return a
-    except Exception as e:
-        print("*****************eeeeeeeeeeeeeeeeeeeerrrrrrrrrrrrrrrrrr*****")
-        print(e)
-        return {
-        }
-def calculate_md5(file_path):
-    hash_md5 = hashlib.md5()
-    with open(file_path, "rb") as f:
-        for chunk in iter(lambda: f.read(4096), b""):
-            hash_md5.update(chunk)
-    return hash_md5.hexdigest()
-def compress(modelname, files):
-    file_path = os.path.join(zips_folder, f"{modelname}.zip")
-    # Select the compression mode ZIP_DEFLATED for compression
-    # or zipfile.ZIP_STORED to just store the file
-    compression = zipfile.ZIP_DEFLATED
-    # Comprueba si el archivo ZIP ya existe
-    if not os.path.exists(file_path):
-        # Si no existe, crea el archivo ZIP
-        with zipfile.ZipFile(file_path, mode="w") as zf:
-            try:
-                for file in files:
-                    if file:
-                        # Agrega el archivo al archivo ZIP
-                        zf.write(unzips_folder if ".index" in file else os.path.join(unzips_folder, file), compress_type=compression)
-            except FileNotFoundError as fnf:
-                print("An error occurred", fnf)
-    else:
-        # Si el archivo ZIP ya existe, agrega los archivos a un archivo ZIP existente
-        with zipfile.ZipFile(file_path, mode="a") as zf:
-            try:
-                for file in files:
-                    if file:
-                        # Agrega el archivo al archivo ZIP
-                         zf.write(unzips_folder if ".index" in file else os.path.join(unzips_folder, file), compress_type=compression)
-            except FileNotFoundError as fnf:
-                print("An error occurred", fnf)
-    return file_path
-def infer(model, f0_method, audio_file):
-    print("****", audio_file)
-    inference = Inference(
-        model_name=model,
-        f0_method=f0_method,
-        source_audio_path=audio_file,
-        output_file_name=os.path.join("./audio-outputs", os.path.basename(audio_file))
-    )
-    output = inference.run()
-    if 'success' in output and output['success']:
-        return output, output['file']
-    else:
-        return
-def post_model(name, model_url, version, creator):
-    modelname = model_downloader(model_url, zips_folder, unzips_folder)
-    model_files = get_model(unzips_folder, modelname)
-    if not model_files:
-        return "No se encontrado un modelo valido, verifica el contenido del enlace e intentalo más tarde."
-    if not model_files.get('pth'):
-        return "No se encontrado un modelo valido, verifica el contenido del enlace e intentalo más tarde."
-    md5_hash = calculate_md5(os.path.join(unzips_folder,model_files['pth']))
-    zipfile = compress(modelname, list(model_files.values()))
-    a = get_info(model_files.get('pth'))
-    file_to_upload = open(zipfile, "rb")
-    info = a.get("info", "None"),
-    sr = a.get("sr", "None"),
-    f0 = a.get("f0", "None"),
-    data = {
-        "name": name,
-        "version": version,
-        "creator": creator,
-        "hash": md5_hash,
-        "info": info,
-        "sr": sr,
-        "f0": f0
-    }
-    print("Subiendo archivo...")
-    # Realizar la solicitud POST
-    response = requests.post(api_url, files={"file": file_to_upload}, data=data)
-    result = response.json()
-    # Comprobar la respuesta
-    if response.status_code == 200:
-        result = response.json()
-        return json.dumps(result, indent=4)
-    else:
-        print("Error al cargar el archivo:", response.status_code)
-        return result
-def search_model(name):
-    web_service_url = "https://script.google.com/macros/s/AKfycbyRaNxtcuN8CxUrcA_nHW6Sq9G2QJor8Z2-BJUGnQ2F_CB8klF4kQL--U2r2MhLFZ5J/exec"
-    response = requests.post(web_service_url, json={
-        'type': 'search_by_filename',
-        'name': name
-    })
-    result = []
-    response.raise_for_status()  # Lanza una excepción en caso de error
-    json_response = response.json()
-    cont = 0
-    result.append("""| Nombre del modelo | Url | Epoch | Sample Rate |
-                  | ---------------- | -------------- |:------:|:-----------:|
-                  """)
-    yield "<br />".join(result)
-    if json_response.get('ok', None):
-        for model in json_response['ocurrences']:
-            if cont < 20:
-                model_name = str(model.get('name', 'N/A')).strip()
-                model_url = model.get('url', 'N/A')
-                epoch = model.get('epoch', 'N/A')
-                sr = model.get('sr', 'N/A')
-                line = f"""|{model_name}|<a>{model_url}</a>|{epoch}|{sr}|
-                """
-                result.append(line)
-                yield "".join(result)
-            cont += 1
-def update_tts_methods_voice(select_value):
-    if select_value == "Edge-tts":
-        return gr.Dropdown.update(choices=EDGE_VOICES, visible=True, value="es-CO-GonzaloNeural-Male"), gr.Markdown.update(visible=False), gr.Textbox.update(visible=False),gr.Radio.update(visible=False)
-    elif select_value == "Bark-tts":
-        return gr.Dropdown.update(choices=BARK_VOICES, visible=True), gr.Markdown.update(visible=False), gr.Textbox.update(visible=False),gr.Radio.update(visible=False)
-    elif select_value == 'ElevenLabs':
-        return gr.Dropdown.update(choices=ELEVENLABS_VOICES_NAMES, visible=True, value="Bella"), gr.Markdown.update(visible=True), gr.Textbox.update(visible=True), gr.Radio.update(visible=False)
-    elif select_value == 'CoquiTTS':
-        return gr.Dropdown.update(visible=False), gr.Markdown.update(visible=False), gr.Textbox.update(visible=False), gr.Radio.update(visible=True)
 with gr.Blocks() as app:
     gr.HTML("<h1> Simple RVC Inference - by Juuxn 💻 </h1>")
@@ -215,6 +97,29 @@ with gr.Blocks() as app:
                 """, visible=False)
         tts_method.change(fn=update_tts_methods_voice, inputs=[tts_method], outputs=[tts_model, tts_msg, tts_api_key, tts_coqui_languages])
     with gr.Tab("Modelos"):
         gr.HTML("<h4>Buscar modelos</h4>")

 import gradio as gr
 import os
+from constants import VOICE_METHODS, BARK_VOICES, EDGE_VOICES
+import platform
+from models.model import *
+from tts.conversion import COQUI_LANGUAGES
+import pytube
+import os
+import traceback
+from pydub import AudioSegment
+# from audio_enhance.functions import audio_enhance
+def convert_yt_to_wav(url):
+    if not url:
+        return "Primero introduce el enlace del video", None
+    try:
+        print(f"Convirtiendo video {url}...")
+        # Descargar el video utilizando pytube
+        video = pytube.YouTube(url)
+        stream = video.streams.filter(only_audio=True).first()
+        video_output_folder = os.path.join(f"yt_videos")  # Ruta de destino de la carpeta
+        audio_output_folder = 'audios'
+        print("Downloading video")
+        video_file_path = stream.download(output_path=video_output_folder)
+        print(video_file_path)
+        file_name = os.path.basename(video_file_path)
+        audio_file_path = os.path.join(audio_output_folder, file_name.replace('.mp4','.wav'))
+        # convert mp4 to wav
+        print("Converting to wav")
+        sound = AudioSegment.from_file(video_file_path,format="mp4")
+        sound.export(audio_file_path, format="wav")
+        if os.path.exists(video_file_path):
+            os.remove(video_file_path)
+        return "Success", audio_file_path
+    except ConnectionResetError as cre:
+        return "Se ha perdido la conexión, recarga o reintentalo nuevamente más tarde.", None
+    except Exception as e:
+        return str(e), None
 with gr.Blocks() as app:
     gr.HTML("<h1> Simple RVC Inference - by Juuxn 💻 </h1>")
                 """, visible=False)
         tts_method.change(fn=update_tts_methods_voice, inputs=[tts_method], outputs=[tts_model, tts_msg, tts_api_key, tts_coqui_languages])
+    with gr.TabItem("Youtube"):
+        gr.Markdown("## Convertir video de Youtube a audio")
+        with gr.Row():
+            yt_url = gr.Textbox(
+                label="Url del video:",
+                placeholder="https://www.youtube.com/watch?v=3vEiqil5d3Q"
+            )
+        yt_btn = gr.Button(value="Convertir")
+        with gr.Row():
+            yt_output1 = gr.Textbox(label="Salida")
+            yt_output2 = gr.Audio(label="Audio de salida")
+        yt_btn.click(fn=convert_yt_to_wav, inputs=[yt_url], outputs=[yt_output1, yt_output2])
+    # with gr.TabItem("Mejora de audio"):
+    #     enhance_input_audio = gr.Audio(label="Audio de entrada")
+    #     enhance_output_audio = gr.Audio(label="Audio de salida")
+    #     btn_enhance_audio = gr.Button()
+    #     # btn_enhance_audio.click(fn=audio_enhance, inputs=[enhance_input_audio], outputs=[enhance_output_audio])
     with gr.Tab("Modelos"):
         gr.HTML("<h4>Buscar modelos</h4>")

audio_enhance/functions.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import torchaudio
+import numpy as np
+import torchaudio.transforms as T
+from df import enhance, init_df
+df_sr = 48000
+model, df_state, _ = init_df()
+def audio_enchance(input_audio):
+    extension = input_audio.split('.')[-1]
+    if extension not in ['wav', 'mpeg', 'ogg']:
+        return "El formato del audio no es valido, usa wav, mpeg o ogg", None
+    else:
+        noisy_audio, sr = torchaudio.load(input_audio)
+        print("np.shape(noisy_audio)", np.shape(noisy_audio))
+        if sr != df_sr:
+            resampler = T.Resample(orig_freq=sr, new_freq=df_sr)
+            noisy_audio = resampler(noisy_audio)
+        output_audio = enhance(model, df_state, noisy_audio)
+        return np.shape(noisy_audio), noisy_audio

tts/constants.py → constants.py RENAMED Viewed

@@ -1,3 +1,6 @@
 VOICE_METHODS = ["Edge-tts", "CoquiTTS", "ElevenLabs",]
 BARK_VOICES = [

+zips_folder = "./zips"
+unzips_folder = "./unzips"
 VOICE_METHODS = ["Edge-tts", "CoquiTTS", "ElevenLabs",]
 BARK_VOICES = [

models/model.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import zipfile
+import hashlib
+from utils.model import model_downloader, get_model
+import requests
+import json
+import torch
+import os
+from inference import Inference
+import gradio as gr
+from constants import VOICE_METHODS, BARK_VOICES, EDGE_VOICES, zips_folder, unzips_folder
+from tts.conversion import tts_infer, ELEVENLABS_VOICES_RAW, ELEVENLABS_VOICES_NAMES
+api_url = "https://rvc-models-api.onrender.com/uploadfile/"
+if not os.path.exists(zips_folder):
+      os.mkdir(zips_folder)
+if not os.path.exists(unzips_folder):
+  os.mkdir(unzips_folder)
+def get_info(path):
+    path = os.path.join(unzips_folder, path)
+    try:
+        a = torch.load(path, map_location="cpu")
+        return a
+    except Exception as e:
+        print("*****************eeeeeeeeeeeeeeeeeeeerrrrrrrrrrrrrrrrrr*****")
+        print(e)
+        return {
+        }
+def calculate_md5(file_path):
+    hash_md5 = hashlib.md5()
+    with open(file_path, "rb") as f:
+        for chunk in iter(lambda: f.read(4096), b""):
+            hash_md5.update(chunk)
+    return hash_md5.hexdigest()
+def compress(modelname, files):
+    file_path = os.path.join(zips_folder, f"{modelname}.zip")
+    # Select the compression mode ZIP_DEFLATED for compression
+    # or zipfile.ZIP_STORED to just store the file
+    compression = zipfile.ZIP_DEFLATED
+    # Comprueba si el archivo ZIP ya existe
+    if not os.path.exists(file_path):
+        # Si no existe, crea el archivo ZIP
+        with zipfile.ZipFile(file_path, mode="w") as zf:
+            try:
+                for file in files:
+                    if file:
+                        # Agrega el archivo al archivo ZIP
+                        zf.write(unzips_folder if ".index" in file else os.path.join(unzips_folder, file), compress_type=compression)
+            except FileNotFoundError as fnf:
+                print("An error occurred", fnf)
+    else:
+        # Si el archivo ZIP ya existe, agrega los archivos a un archivo ZIP existente
+        with zipfile.ZipFile(file_path, mode="a") as zf:
+            try:
+                for file in files:
+                    if file:
+                        # Agrega el archivo al archivo ZIP
+                         zf.write(unzips_folder if ".index" in file else os.path.join(unzips_folder, file), compress_type=compression)
+            except FileNotFoundError as fnf:
+                print("An error occurred", fnf)
+    return file_path
+def infer(model, f0_method, audio_file):
+    print("****", audio_file)
+    inference = Inference(
+        model_name=model,
+        f0_method=f0_method,
+        source_audio_path=audio_file,
+        output_file_name=os.path.join("./audio-outputs", os.path.basename(audio_file))
+    )
+    output = inference.run()
+    if 'success' in output and output['success']:
+        return output, output['file']
+    else:
+        return
+def post_model(name, model_url, version, creator):
+    modelname = model_downloader(model_url, zips_folder, unzips_folder)
+    model_files = get_model(unzips_folder, modelname)
+    if not model_files:
+        return "No se encontrado un modelo valido, verifica el contenido del enlace e intentalo más tarde."
+    if not model_files.get('pth'):
+        return "No se encontrado un modelo valido, verifica el contenido del enlace e intentalo más tarde."
+    md5_hash = calculate_md5(os.path.join(unzips_folder,model_files['pth']))
+    zipfile = compress(modelname, list(model_files.values()))
+    a = get_info(model_files.get('pth'))
+    file_to_upload = open(zipfile, "rb")
+    info = a.get("info", "None"),
+    sr = a.get("sr", "None"),
+    f0 = a.get("f0", "None"),
+    data = {
+        "name": name,
+        "version": version,
+        "creator": creator,
+        "hash": md5_hash,
+        "info": info,
+        "sr": sr,
+        "f0": f0
+    }
+    print("Subiendo archivo...")
+    # Realizar la solicitud POST
+    response = requests.post(api_url, files={"file": file_to_upload}, data=data)
+    result = response.json()
+    # Comprobar la respuesta
+    if response.status_code == 200:
+        result = response.json()
+        return json.dumps(result, indent=4)
+    else:
+        print("Error al cargar el archivo:", response.status_code)
+        return result
+def search_model(name):
+    web_service_url = "https://script.google.com/macros/s/AKfycbyRaNxtcuN8CxUrcA_nHW6Sq9G2QJor8Z2-BJUGnQ2F_CB8klF4kQL--U2r2MhLFZ5J/exec"
+    response = requests.post(web_service_url, json={
+        'type': 'search_by_filename',
+        'name': name
+    })
+    result = []
+    response.raise_for_status()  # Lanza una excepción en caso de error
+    json_response = response.json()
+    cont = 0
+    result.append("""| Nombre del modelo | Url | Epoch | Sample Rate |
+                  | ---------------- | -------------- |:------:|:-----------:|
+                  """)
+    yield "<br />".join(result)
+    if json_response.get('ok', None):
+        for model in json_response['ocurrences']:
+            if cont < 20:
+                model_name = str(model.get('name', 'N/A')).strip()
+                model_url = model.get('url', 'N/A')
+                epoch = model.get('epoch', 'N/A')
+                sr = model.get('sr', 'N/A')
+                line = f"""|{model_name}|<a>{model_url}</a>|{epoch}|{sr}|
+                """
+                result.append(line)
+                yield "".join(result)
+            cont += 1
+def update_tts_methods_voice(select_value):
+    if select_value == "Edge-tts":
+        return gr.Dropdown.update(choices=EDGE_VOICES, visible=True, value="es-CO-GonzaloNeural-Male"), gr.Markdown.update(visible=False), gr.Textbox.update(visible=False),gr.Radio.update(visible=False)
+    elif select_value == "Bark-tts":
+        return gr.Dropdown.update(choices=BARK_VOICES, visible=True), gr.Markdown.update(visible=False), gr.Textbox.update(visible=False),gr.Radio.update(visible=False)
+    elif select_value == 'ElevenLabs':
+        return gr.Dropdown.update(choices=ELEVENLABS_VOICES_NAMES, visible=True, value="Bella"), gr.Markdown.update(visible=True), gr.Textbox.update(visible=True), gr.Radio.update(visible=False)
+    elif select_value == 'CoquiTTS':
+        return gr.Dropdown.update(visible=False), gr.Markdown.update(visible=False), gr.Textbox.update(visible=False), gr.Radio.update(visible=True)

requirements.txt CHANGED Viewed

@@ -172,4 +172,9 @@ validators
 #git+https://github.com/suno-ai/bark.git
 #tortoise-tts
 #git+https://github.com/neonbjb/tortoise-tts.git
-neon-tts-plugin-coqui

 #git+https://github.com/suno-ai/bark.git
 #tortoise-tts
 #git+https://github.com/neonbjb/tortoise-tts.git
+neon-tts-plugin-coqui
+deepfilternet
+librosa
+matplotlib
+maturin
+#git+https://github.com/microsoft/[email protected]

tts/conversion.py CHANGED Viewed

@@ -10,8 +10,16 @@ import asyncio
 from elevenlabs import voices, generate, save
 from elevenlabs.api.error import UnauthenticatedRateLimitError
 # Not working in windows
-from neon_tts_plugin_coqui import CoquiTTS
-import tempfile
 # Elevenlabs
 ELEVENLABS_VOICES_RAW = voices()
@@ -24,10 +32,6 @@ def get_elevenlabs_voice_names():
 ELEVENLABS_VOICES_NAMES = get_elevenlabs_voice_names()
-# CoquiTTS
-COQUI_LANGUAGES = list(CoquiTTS.langs.keys())
-coquiTTS = CoquiTTS()
 def tts_infer(tts_text, model_url, tts_method, tts_model, tts_api_key, language):
     if not tts_text:
         return 'Primero escribe el texto que quieres convertir.', None
@@ -68,6 +72,9 @@ def tts_infer(tts_text, model_url, tts_method, tts_model, tts_api_key, language)
     #     api.TextToSpeech()
     if tts_method == "CoquiTTS":
         print(tts_text, language)
         # return output
         coquiTTS.get_tts(tts_text, converted_tts_filename, speaker = {"language" : language})

 from elevenlabs import voices, generate, save
 from elevenlabs.api.error import UnauthenticatedRateLimitError
 # Not working in windows
+import platform
+COQUI_LANGUAGES = []
+if platform.system() != 'Windows':
+    from neon_tts_plugin_coqui import CoquiTTS
+    # CoquiTTS
+    COQUI_LANGUAGES = list(CoquiTTS.langs.keys())
+    coquiTTS = CoquiTTS()
 # Elevenlabs
 ELEVENLABS_VOICES_RAW = voices()
 ELEVENLABS_VOICES_NAMES = get_elevenlabs_voice_names()
 def tts_infer(tts_text, model_url, tts_method, tts_model, tts_api_key, language):
     if not tts_text:
         return 'Primero escribe el texto que quieres convertir.', None
     #     api.TextToSpeech()
     if tts_method == "CoquiTTS":
+        if platform.system() == 'Windows':
+            return "Funcionalidad no disponible en windows", None
         print(tts_text, language)
         # return output
         coquiTTS.get_tts(tts_text, converted_tts_filename, speaker = {"language" : language})

tts/test.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from neon_tts_plugin_coqui import CoquiTTS