SimpleRVC

Runtime error

App Files Files Community

xJuuzouYTx commited on Nov 1, 2023

Commit

f98d769

•

1 Parent(s): 5837809

[ADD] coquitts

Browse files

Files changed (5) hide show

app.py +41 -9
packages.txt +2 -0
requirements.txt +4 -1
tts/constants.py +1 -1
tts/conversion.py +19 -62

app.py CHANGED Viewed

@@ -6,8 +6,9 @@ import hashlib
 from utils.model import model_downloader, get_model
 import requests
 import json
 from tts.constants import VOICE_METHODS, BARK_VOICES, EDGE_VOICES
-from tts.conversion import tts_infer, ELEVENLABS_VOICES_RAW, ELEVENLABS_VOICES_NAMES
 api_url = "https://rvc-models-api.onrender.com/uploadfile/"
@@ -18,6 +19,17 @@ if not os.path.exists(zips_folder):
 if not os.path.exists(unzips_folder):
   os.mkdir(unzips_folder)
 def calculate_md5(file_path):
     hash_md5 = hashlib.md5()
     with open(file_path, "rb") as f:
@@ -82,16 +94,26 @@ def post_model(name, model_url, version, creator):
     md5_hash = calculate_md5(os.path.join(unzips_folder,model_files['pth']))
     zipfile = compress(modelname, list(model_files.values()))
     file_to_upload = open(zipfile, "rb")
     data = {
         "name": name,
         "version": version,
         "creator": creator,
-        "hash": md5_hash
     }
     print("Subiendo archivo...")
     # Realizar la solicitud POST
     response = requests.post(api_url, files={"file": file_to_upload}, data=data)
     # Comprobar la respuesta
     if response.status_code == 200:
@@ -100,6 +122,7 @@ def post_model(name, model_url, version, creator):
     else:
         print("Error al cargar el archivo:", response.status_code)
         return result
 def search_model(name):
     web_service_url = "https://script.google.com/macros/s/AKfycbyRaNxtcuN8CxUrcA_nHW6Sq9G2QJor8Z2-BJUGnQ2F_CB8klF4kQL--U2r2MhLFZ5J/exec"
@@ -130,11 +153,13 @@ def search_model(name):
 def update_tts_methods_voice(select_value):
     if select_value == "Edge-tts":
-        return gr.update(choices=EDGE_VOICES), gr.Markdown.update(visible=False), gr.Textbox.update(visible=False)
     elif select_value == "Bark-tts":
-        return gr.update(choices=BARK_VOICES), gr.Markdown.update(visible=False), gr.Textbox.update(visible=False)
     elif select_value == 'ElevenLabs':
-        return gr.update(choices=ELEVENLABS_VOICES_NAMES), gr.Markdown.update(visible=True), gr.Textbox.update(visible=True)
 with gr.Blocks() as app:
     gr.HTML("<h1> Simple RVC Inference - by Juuxn 💻 </h1>")
@@ -168,7 +193,14 @@ with gr.Blocks() as app:
             with gr.Row():
                 tts_method = gr.Dropdown(choices=VOICE_METHODS, value="Edge-tts", label="Método TTS:", visible=True)
                 tts_model = gr.Dropdown(choices=ELEVENLABS_VOICES_NAMES, label="Modelo TTS:", visible=True, interactive=True)
-                tts_api_key = gr.Textbox(label="ElevenLabs Api key", show_label=True, placeholder="4a4afce72349680c8e8b6fdcfaf2b65a",interactive=True)
             tts_btn = gr.Button(value="Convertir")
@@ -176,13 +208,13 @@ with gr.Blocks() as app:
                 tts_vc_output1 = gr.Textbox(label="Salida")
                 tts_vc_output2 = gr.Audio(label="Audio de salida")
-        tts_btn.click(fn=tts_infer, inputs=[tts_text, tts_model_url, tts_method, tts_model, tts_api_key], outputs=[tts_vc_output1, tts_vc_output2])
         tts_msg = gr.Markdown("""**Recomiendo que te crees una cuenta de eleven labs y pongas tu clave de api, es gratis y tienes 10k caracteres de limite al mes.** <br/>
                 ![Imgur](https://imgur.com/HH6YTu0.png)
-                """, visible=True)
-        tts_method.change(fn=update_tts_methods_voice, inputs=[tts_method], outputs=[tts_model, tts_msg, tts_api_key])
     with gr.Tab("Modelos"):
         gr.HTML("<h4>Buscar modelos</h4>")

 from utils.model import model_downloader, get_model
 import requests
 import json
+import torch
 from tts.constants import VOICE_METHODS, BARK_VOICES, EDGE_VOICES
+from tts.conversion import tts_infer, ELEVENLABS_VOICES_RAW, ELEVENLABS_VOICES_NAMES, COQUI_LANGUAGES
 api_url = "https://rvc-models-api.onrender.com/uploadfile/"
 if not os.path.exists(unzips_folder):
   os.mkdir(unzips_folder)
+def get_info(path):
+    path = os.path.join(unzips_folder, path)
+    try:
+        a = torch.load(path, map_location="cpu")
+        return a
+    except Exception as e:
+        print("*****************eeeeeeeeeeeeeeeeeeeerrrrrrrrrrrrrrrrrr*****")
+        print(e)
+        return {
+        }
 def calculate_md5(file_path):
     hash_md5 = hashlib.md5()
     with open(file_path, "rb") as f:
     md5_hash = calculate_md5(os.path.join(unzips_folder,model_files['pth']))
     zipfile = compress(modelname, list(model_files.values()))
+    a = get_info(model_files.get('pth'))
     file_to_upload = open(zipfile, "rb")
+    info = a.get("info", "None"),
+    sr = a.get("sr", "None"),
+    f0 = a.get("f0", "None"),
     data = {
         "name": name,
         "version": version,
         "creator": creator,
+        "hash": md5_hash,
+        "info": info,
+        "sr": sr,
+        "f0": f0
     }
     print("Subiendo archivo...")
     # Realizar la solicitud POST
     response = requests.post(api_url, files={"file": file_to_upload}, data=data)
+    result = response.json()
     # Comprobar la respuesta
     if response.status_code == 200:
     else:
         print("Error al cargar el archivo:", response.status_code)
         return result
 def search_model(name):
     web_service_url = "https://script.google.com/macros/s/AKfycbyRaNxtcuN8CxUrcA_nHW6Sq9G2QJor8Z2-BJUGnQ2F_CB8klF4kQL--U2r2MhLFZ5J/exec"
 def update_tts_methods_voice(select_value):
     if select_value == "Edge-tts":
+        return gr.update(choices=EDGE_VOICES), gr.Markdown.update(visible=False), gr.Textbox.update(visible=False),gr.Radio.update(visible=False)
     elif select_value == "Bark-tts":
+        return gr.update(choices=BARK_VOICES), gr.Markdown.update(visible=False), gr.Textbox.update(visible=False),gr.Radio.update(visible=False)
     elif select_value == 'ElevenLabs':
+        return gr.update(choices=ELEVENLABS_VOICES_NAMES), gr.Markdown.update(visible=True), gr.Textbox.update(visible=True), gr.Radio.update(visible=False)
+    elif select_value == 'CoquiTTS':
+        return gr.Dropdown(visible=False), gr.Markdown.update(visible=True), gr.Textbox.update(visible=True), gr.Radio.update(visible=False)
 with gr.Blocks() as app:
     gr.HTML("<h1> Simple RVC Inference - by Juuxn 💻 </h1>")
             with gr.Row():
                 tts_method = gr.Dropdown(choices=VOICE_METHODS, value="Edge-tts", label="Método TTS:", visible=True)
                 tts_model = gr.Dropdown(choices=ELEVENLABS_VOICES_NAMES, label="Modelo TTS:", visible=True, interactive=True)
+                tts_api_key = gr.Textbox(label="ElevenLabs Api key", show_label=True, placeholder="4a4afce72349680c8e8b6fdcfaf2b65a",interactive=True, visible=False)
+            tts_coqui_languages = gr.Radio(
+                label="Language",
+                choices=COQUI_LANGUAGES,
+                value="en",
+                visible=False
+            )
             tts_btn = gr.Button(value="Convertir")
                 tts_vc_output1 = gr.Textbox(label="Salida")
                 tts_vc_output2 = gr.Audio(label="Audio de salida")
+        tts_btn.click(fn=tts_infer, inputs=[tts_text, tts_model_url, tts_method, tts_model, tts_api_key, tts_coqui_languages], outputs=[tts_vc_output1, tts_vc_output2])
         tts_msg = gr.Markdown("""**Recomiendo que te crees una cuenta de eleven labs y pongas tu clave de api, es gratis y tienes 10k caracteres de limite al mes.** <br/>
                 ![Imgur](https://imgur.com/HH6YTu0.png)
+                """, visible=False)
+        tts_method.change(fn=update_tts_methods_voice, inputs=[tts_method], outputs=[tts_model, tts_msg, tts_api_key, tts_coqui_languages])
     with gr.Tab("Modelos"):
         gr.HTML("<h4>Buscar modelos</h4>")

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ libsndfile1
2	+ espeak-ng

requirements.txt CHANGED Viewed

@@ -169,4 +169,7 @@ firebase_admin
 nltk
 gdown
 validators
-git+https://github.com/suno-ai/bark.git

 nltk
 gdown
 validators
+#git+https://github.com/suno-ai/bark.git
+#tortoise-tts
+#git+https://github.com/neonbjb/tortoise-tts.git
+neon-tts-plugin-coqui==0.7.3a1

tts/constants.py CHANGED Viewed

@@ -1,4 +1,4 @@
-VOICE_METHODS = ["Edge-tts", "ElevenLabs",]
 BARK_VOICES = [
     "v2/en_speaker_0-Male",

+VOICE_METHODS = ["Edge-tts", "CoquiTTS", "ElevenLabs",]
 BARK_VOICES = [
     "v2/en_speaker_0-Male",

tts/conversion.py CHANGED Viewed

@@ -9,7 +9,10 @@ from inference import Inference
 import asyncio
 from elevenlabs import voices, generate, save
 from elevenlabs.api.error import UnauthenticatedRateLimitError
 ELEVENLABS_VOICES_RAW = voices()
 def get_elevenlabs_voice_names():
@@ -20,50 +23,11 @@ def get_elevenlabs_voice_names():
 ELEVENLABS_VOICES_NAMES = get_elevenlabs_voice_names()
-#git+https://github.com/suno-ai/bark.git
-# from transformers import AutoProcessor, BarkModel
-# import nltk
-# from nltk.tokenize import sent_tokenize
-# from bark import SAMPLE_RATE
-# now_dir = os.getcwd()
-def cast_to_device(tensor, device):
-    try:
-        return tensor.to(device)
-    except Exception as e:
-        print(e)
-        return tensor
-# Buscar la forma de evitar descargar el archivo de 4gb cada vez que crea una instancia
-# def _bark_conversion_(text, voice_preset):
-#     os.makedirs(os.path.join(now_dir, "tts"), exist_ok=True)
-#     device = "cuda:0" if torch.cuda.is_available() else "cpu"
-#     dtype = torch.float32 if "cpu" in device else torch.float16
-#     bark_processor = AutoProcessor.from_pretrained(
-#         "suno/bark",
-#         cache_dir=os.path.join(now_dir, "tts", "suno/bark"),
-#         torch_dtype=dtype,
-#     )
-#     bark_model = BarkModel.from_pretrained(
-#         "suno/bark",
-#         cache_dir=os.path.join(now_dir, "tts", "suno/bark"),
-#         torch_dtype=dtype,
-#     ).to(device)
-#     # bark_model.enable_cpu_offload()
-#     inputs = bark_processor(text=[text], return_tensors="pt", voice_preset=voice_preset)
-#     tensor_dict = {
-#         k: cast_to_device(v, device) if hasattr(v, "to") else v
-#         for k, v in inputs.items()
-#     }
-#     speech_values = bark_model.generate(**tensor_dict, do_sample=True)
-#     sampling_rate = bark_model.generation_config.sample_rate
-#     speech = speech_values.cpu().numpy().squeeze()
-#     return speech, sampling_rate
-def tts_infer(tts_text, model_url, tts_method, tts_model, tts_api_key):
     if not tts_text:
         return 'Primero escribe el texto que quieres convertir.', None
     if not tts_model:
@@ -79,8 +43,8 @@ def tts_infer(tts_text, model_url, tts_method, tts_model, tts_api_key):
         tts_text = tts_text[:60]
         print("DEMO; limit to 60 characters")
-    language = tts_model[:2]
     if tts_method == "Edge-tts":
         try:
             asyncio.run(
                 edge_tts.Communicate(
@@ -102,6 +66,17 @@ def tts_infer(tts_text, model_url, tts_method, tts_model, tts_api_key):
                 tts.save(converted_tts_filename)
                 print("Error: Audio will be replaced.")
                 success = False
     if tts_method == 'ElevenLabs':
         try:
             audio = generate(
@@ -117,25 +92,7 @@ def tts_infer(tts_text, model_url, tts_method, tts_model, tts_api_key):
     if not model_url:
         return 'Pon la url del modelo si quieres aplicarle otro tono.', converted_tts_filename
-    # elif tts_method == "Bark-tts":
-    #     try:
-    #         script = tts_text.replace("\n", " ").strip()
-    #         sentences = sent_tokenize(script)
-    #         silence = np.zeros(int(0.25 * SAMPLE_RATE))
-    #         pieces = []
-    #         for sentence in sentences:
-    #             audio_array, _ = _bark_conversion_(sentence, tts_model.split("-")[0])
-    #             pieces += [audio_array, silence.copy()]
-    #         sf.write(
-    #             file=converted_tts_filename, samplerate=SAMPLE_RATE, data=np.concatenate(pieces)
-    #         )
-    #     except Exception as e:
-    #         print(f"{e}")
-    #         return None, None
     if success:
         inference = Inference(
             model_name=model_url,

 import asyncio
 from elevenlabs import voices, generate, save
 from elevenlabs.api.error import UnauthenticatedRateLimitError
+from neon_tts_plugin_coqui import CoquiTTS
+import tempfile
+# Elevenlabs
 ELEVENLABS_VOICES_RAW = voices()
 def get_elevenlabs_voice_names():
 ELEVENLABS_VOICES_NAMES = get_elevenlabs_voice_names()
+# CoquiTTS
+COQUI_LANGUAGES = list(CoquiTTS.langs.keys())
+coquiTTS = CoquiTTS()
+def tts_infer(tts_text, model_url, tts_method, tts_model, tts_api_key, language):
     if not tts_text:
         return 'Primero escribe el texto que quieres convertir.', None
     if not tts_model:
         tts_text = tts_text[:60]
         print("DEMO; limit to 60 characters")
     if tts_method == "Edge-tts":
+        language = tts_model[:2]
         try:
             asyncio.run(
                 edge_tts.Communicate(
                 tts.save(converted_tts_filename)
                 print("Error: Audio will be replaced.")
                 success = False
+    # if tts_method == "Tortoise":
+    #     api.TextToSpeech()
+    if tts_method == "CoquiTTS":
+        print(tts_text, language)
+        # return output
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
+            coquiTTS.get_tts(tts_text, fp, speaker = {"language" : language})
+            return fp.name
     if tts_method == 'ElevenLabs':
         try:
             audio = generate(
     if not model_url:
         return 'Pon la url del modelo si quieres aplicarle otro tono.', converted_tts_filename
     if success:
         inference = Inference(
             model_name=model_url,