xJuuzouYTx commited on
Commit
f98d769
1 Parent(s): 5837809

[ADD] coquitts

Browse files
Files changed (5) hide show
  1. app.py +41 -9
  2. packages.txt +2 -0
  3. requirements.txt +4 -1
  4. tts/constants.py +1 -1
  5. tts/conversion.py +19 -62
app.py CHANGED
@@ -6,8 +6,9 @@ import hashlib
6
  from utils.model import model_downloader, get_model
7
  import requests
8
  import json
 
9
  from tts.constants import VOICE_METHODS, BARK_VOICES, EDGE_VOICES
10
- from tts.conversion import tts_infer, ELEVENLABS_VOICES_RAW, ELEVENLABS_VOICES_NAMES
11
 
12
  api_url = "https://rvc-models-api.onrender.com/uploadfile/"
13
 
@@ -18,6 +19,17 @@ if not os.path.exists(zips_folder):
18
  if not os.path.exists(unzips_folder):
19
  os.mkdir(unzips_folder)
20
 
 
 
 
 
 
 
 
 
 
 
 
21
  def calculate_md5(file_path):
22
  hash_md5 = hashlib.md5()
23
  with open(file_path, "rb") as f:
@@ -82,16 +94,26 @@ def post_model(name, model_url, version, creator):
82
 
83
  md5_hash = calculate_md5(os.path.join(unzips_folder,model_files['pth']))
84
  zipfile = compress(modelname, list(model_files.values()))
 
 
85
  file_to_upload = open(zipfile, "rb")
 
 
 
 
86
  data = {
87
  "name": name,
88
  "version": version,
89
  "creator": creator,
90
- "hash": md5_hash
 
 
 
91
  }
92
  print("Subiendo archivo...")
93
  # Realizar la solicitud POST
94
  response = requests.post(api_url, files={"file": file_to_upload}, data=data)
 
95
 
96
  # Comprobar la respuesta
97
  if response.status_code == 200:
@@ -100,6 +122,7 @@ def post_model(name, model_url, version, creator):
100
  else:
101
  print("Error al cargar el archivo:", response.status_code)
102
  return result
 
103
 
104
  def search_model(name):
105
  web_service_url = "https://script.google.com/macros/s/AKfycbyRaNxtcuN8CxUrcA_nHW6Sq9G2QJor8Z2-BJUGnQ2F_CB8klF4kQL--U2r2MhLFZ5J/exec"
@@ -130,11 +153,13 @@ def search_model(name):
130
 
131
  def update_tts_methods_voice(select_value):
132
  if select_value == "Edge-tts":
133
- return gr.update(choices=EDGE_VOICES), gr.Markdown.update(visible=False), gr.Textbox.update(visible=False)
134
  elif select_value == "Bark-tts":
135
- return gr.update(choices=BARK_VOICES), gr.Markdown.update(visible=False), gr.Textbox.update(visible=False)
136
  elif select_value == 'ElevenLabs':
137
- return gr.update(choices=ELEVENLABS_VOICES_NAMES), gr.Markdown.update(visible=True), gr.Textbox.update(visible=True)
 
 
138
 
139
  with gr.Blocks() as app:
140
  gr.HTML("<h1> Simple RVC Inference - by Juuxn 💻 </h1>")
@@ -168,7 +193,14 @@ with gr.Blocks() as app:
168
  with gr.Row():
169
  tts_method = gr.Dropdown(choices=VOICE_METHODS, value="Edge-tts", label="Método TTS:", visible=True)
170
  tts_model = gr.Dropdown(choices=ELEVENLABS_VOICES_NAMES, label="Modelo TTS:", visible=True, interactive=True)
171
- tts_api_key = gr.Textbox(label="ElevenLabs Api key", show_label=True, placeholder="4a4afce72349680c8e8b6fdcfaf2b65a",interactive=True)
 
 
 
 
 
 
 
172
 
173
  tts_btn = gr.Button(value="Convertir")
174
 
@@ -176,13 +208,13 @@ with gr.Blocks() as app:
176
  tts_vc_output1 = gr.Textbox(label="Salida")
177
  tts_vc_output2 = gr.Audio(label="Audio de salida")
178
 
179
- tts_btn.click(fn=tts_infer, inputs=[tts_text, tts_model_url, tts_method, tts_model, tts_api_key], outputs=[tts_vc_output1, tts_vc_output2])
180
 
181
  tts_msg = gr.Markdown("""**Recomiendo que te crees una cuenta de eleven labs y pongas tu clave de api, es gratis y tienes 10k caracteres de limite al mes.** <br/>
182
  ![Imgur](https://imgur.com/HH6YTu0.png)
183
- """, visible=True)
184
 
185
- tts_method.change(fn=update_tts_methods_voice, inputs=[tts_method], outputs=[tts_model, tts_msg, tts_api_key])
186
 
187
  with gr.Tab("Modelos"):
188
  gr.HTML("<h4>Buscar modelos</h4>")
 
6
  from utils.model import model_downloader, get_model
7
  import requests
8
  import json
9
+ import torch
10
  from tts.constants import VOICE_METHODS, BARK_VOICES, EDGE_VOICES
11
+ from tts.conversion import tts_infer, ELEVENLABS_VOICES_RAW, ELEVENLABS_VOICES_NAMES, COQUI_LANGUAGES
12
 
13
  api_url = "https://rvc-models-api.onrender.com/uploadfile/"
14
 
 
19
  if not os.path.exists(unzips_folder):
20
  os.mkdir(unzips_folder)
21
 
22
+ def get_info(path):
23
+ path = os.path.join(unzips_folder, path)
24
+ try:
25
+ a = torch.load(path, map_location="cpu")
26
+ return a
27
+ except Exception as e:
28
+ print("*****************eeeeeeeeeeeeeeeeeeeerrrrrrrrrrrrrrrrrr*****")
29
+ print(e)
30
+ return {
31
+
32
+ }
33
  def calculate_md5(file_path):
34
  hash_md5 = hashlib.md5()
35
  with open(file_path, "rb") as f:
 
94
 
95
  md5_hash = calculate_md5(os.path.join(unzips_folder,model_files['pth']))
96
  zipfile = compress(modelname, list(model_files.values()))
97
+
98
+ a = get_info(model_files.get('pth'))
99
  file_to_upload = open(zipfile, "rb")
100
+ info = a.get("info", "None"),
101
+ sr = a.get("sr", "None"),
102
+ f0 = a.get("f0", "None"),
103
+
104
  data = {
105
  "name": name,
106
  "version": version,
107
  "creator": creator,
108
+ "hash": md5_hash,
109
+ "info": info,
110
+ "sr": sr,
111
+ "f0": f0
112
  }
113
  print("Subiendo archivo...")
114
  # Realizar la solicitud POST
115
  response = requests.post(api_url, files={"file": file_to_upload}, data=data)
116
+ result = response.json()
117
 
118
  # Comprobar la respuesta
119
  if response.status_code == 200:
 
122
  else:
123
  print("Error al cargar el archivo:", response.status_code)
124
  return result
125
+
126
 
127
  def search_model(name):
128
  web_service_url = "https://script.google.com/macros/s/AKfycbyRaNxtcuN8CxUrcA_nHW6Sq9G2QJor8Z2-BJUGnQ2F_CB8klF4kQL--U2r2MhLFZ5J/exec"
 
153
 
154
  def update_tts_methods_voice(select_value):
155
  if select_value == "Edge-tts":
156
+ return gr.update(choices=EDGE_VOICES), gr.Markdown.update(visible=False), gr.Textbox.update(visible=False),gr.Radio.update(visible=False)
157
  elif select_value == "Bark-tts":
158
+ return gr.update(choices=BARK_VOICES), gr.Markdown.update(visible=False), gr.Textbox.update(visible=False),gr.Radio.update(visible=False)
159
  elif select_value == 'ElevenLabs':
160
+ return gr.update(choices=ELEVENLABS_VOICES_NAMES), gr.Markdown.update(visible=True), gr.Textbox.update(visible=True), gr.Radio.update(visible=False)
161
+ elif select_value == 'CoquiTTS':
162
+ return gr.Dropdown(visible=False), gr.Markdown.update(visible=True), gr.Textbox.update(visible=True), gr.Radio.update(visible=False)
163
 
164
  with gr.Blocks() as app:
165
  gr.HTML("<h1> Simple RVC Inference - by Juuxn 💻 </h1>")
 
193
  with gr.Row():
194
  tts_method = gr.Dropdown(choices=VOICE_METHODS, value="Edge-tts", label="Método TTS:", visible=True)
195
  tts_model = gr.Dropdown(choices=ELEVENLABS_VOICES_NAMES, label="Modelo TTS:", visible=True, interactive=True)
196
+ tts_api_key = gr.Textbox(label="ElevenLabs Api key", show_label=True, placeholder="4a4afce72349680c8e8b6fdcfaf2b65a",interactive=True, visible=False)
197
+
198
+ tts_coqui_languages = gr.Radio(
199
+ label="Language",
200
+ choices=COQUI_LANGUAGES,
201
+ value="en",
202
+ visible=False
203
+ )
204
 
205
  tts_btn = gr.Button(value="Convertir")
206
 
 
208
  tts_vc_output1 = gr.Textbox(label="Salida")
209
  tts_vc_output2 = gr.Audio(label="Audio de salida")
210
 
211
+ tts_btn.click(fn=tts_infer, inputs=[tts_text, tts_model_url, tts_method, tts_model, tts_api_key, tts_coqui_languages], outputs=[tts_vc_output1, tts_vc_output2])
212
 
213
  tts_msg = gr.Markdown("""**Recomiendo que te crees una cuenta de eleven labs y pongas tu clave de api, es gratis y tienes 10k caracteres de limite al mes.** <br/>
214
  ![Imgur](https://imgur.com/HH6YTu0.png)
215
+ """, visible=False)
216
 
217
+ tts_method.change(fn=update_tts_methods_voice, inputs=[tts_method], outputs=[tts_model, tts_msg, tts_api_key, tts_coqui_languages])
218
 
219
  with gr.Tab("Modelos"):
220
  gr.HTML("<h4>Buscar modelos</h4>")
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ libsndfile1
2
+ espeak-ng
requirements.txt CHANGED
@@ -169,4 +169,7 @@ firebase_admin
169
  nltk
170
  gdown
171
  validators
172
- git+https://github.com/suno-ai/bark.git
 
 
 
 
169
  nltk
170
  gdown
171
  validators
172
+ #git+https://github.com/suno-ai/bark.git
173
+ #tortoise-tts
174
+ #git+https://github.com/neonbjb/tortoise-tts.git
175
+ neon-tts-plugin-coqui==0.7.3a1
tts/constants.py CHANGED
@@ -1,4 +1,4 @@
1
- VOICE_METHODS = ["Edge-tts", "ElevenLabs",]
2
 
3
  BARK_VOICES = [
4
  "v2/en_speaker_0-Male",
 
1
+ VOICE_METHODS = ["Edge-tts", "CoquiTTS", "ElevenLabs",]
2
 
3
  BARK_VOICES = [
4
  "v2/en_speaker_0-Male",
tts/conversion.py CHANGED
@@ -9,7 +9,10 @@ from inference import Inference
9
  import asyncio
10
  from elevenlabs import voices, generate, save
11
  from elevenlabs.api.error import UnauthenticatedRateLimitError
 
 
12
 
 
13
  ELEVENLABS_VOICES_RAW = voices()
14
 
15
  def get_elevenlabs_voice_names():
@@ -20,50 +23,11 @@ def get_elevenlabs_voice_names():
20
 
21
  ELEVENLABS_VOICES_NAMES = get_elevenlabs_voice_names()
22
 
23
- #git+https://github.com/suno-ai/bark.git
24
- # from transformers import AutoProcessor, BarkModel
25
- # import nltk
26
- # from nltk.tokenize import sent_tokenize
27
- # from bark import SAMPLE_RATE
28
 
29
- # now_dir = os.getcwd()
30
-
31
- def cast_to_device(tensor, device):
32
- try:
33
- return tensor.to(device)
34
- except Exception as e:
35
- print(e)
36
- return tensor
37
-
38
- # Buscar la forma de evitar descargar el archivo de 4gb cada vez que crea una instancia
39
- # def _bark_conversion_(text, voice_preset):
40
- # os.makedirs(os.path.join(now_dir, "tts"), exist_ok=True)
41
-
42
- # device = "cuda:0" if torch.cuda.is_available() else "cpu"
43
- # dtype = torch.float32 if "cpu" in device else torch.float16
44
- # bark_processor = AutoProcessor.from_pretrained(
45
- # "suno/bark",
46
- # cache_dir=os.path.join(now_dir, "tts", "suno/bark"),
47
- # torch_dtype=dtype,
48
- # )
49
- # bark_model = BarkModel.from_pretrained(
50
- # "suno/bark",
51
- # cache_dir=os.path.join(now_dir, "tts", "suno/bark"),
52
- # torch_dtype=dtype,
53
- # ).to(device)
54
- # # bark_model.enable_cpu_offload()
55
- # inputs = bark_processor(text=[text], return_tensors="pt", voice_preset=voice_preset)
56
- # tensor_dict = {
57
- # k: cast_to_device(v, device) if hasattr(v, "to") else v
58
- # for k, v in inputs.items()
59
- # }
60
- # speech_values = bark_model.generate(**tensor_dict, do_sample=True)
61
- # sampling_rate = bark_model.generation_config.sample_rate
62
- # speech = speech_values.cpu().numpy().squeeze()
63
- # return speech, sampling_rate
64
-
65
-
66
- def tts_infer(tts_text, model_url, tts_method, tts_model, tts_api_key):
67
  if not tts_text:
68
  return 'Primero escribe el texto que quieres convertir.', None
69
  if not tts_model:
@@ -79,8 +43,8 @@ def tts_infer(tts_text, model_url, tts_method, tts_model, tts_api_key):
79
  tts_text = tts_text[:60]
80
  print("DEMO; limit to 60 characters")
81
 
82
- language = tts_model[:2]
83
  if tts_method == "Edge-tts":
 
84
  try:
85
  asyncio.run(
86
  edge_tts.Communicate(
@@ -102,6 +66,17 @@ def tts_infer(tts_text, model_url, tts_method, tts_model, tts_api_key):
102
  tts.save(converted_tts_filename)
103
  print("Error: Audio will be replaced.")
104
  success = False
 
 
 
 
 
 
 
 
 
 
 
105
  if tts_method == 'ElevenLabs':
106
  try:
107
  audio = generate(
@@ -117,25 +92,7 @@ def tts_infer(tts_text, model_url, tts_method, tts_model, tts_api_key):
117
 
118
  if not model_url:
119
  return 'Pon la url del modelo si quieres aplicarle otro tono.', converted_tts_filename
120
-
121
- # elif tts_method == "Bark-tts":
122
- # try:
123
- # script = tts_text.replace("\n", " ").strip()
124
- # sentences = sent_tokenize(script)
125
- # silence = np.zeros(int(0.25 * SAMPLE_RATE))
126
- # pieces = []
127
- # for sentence in sentences:
128
- # audio_array, _ = _bark_conversion_(sentence, tts_model.split("-")[0])
129
- # pieces += [audio_array, silence.copy()]
130
 
131
- # sf.write(
132
- # file=converted_tts_filename, samplerate=SAMPLE_RATE, data=np.concatenate(pieces)
133
- # )
134
-
135
- # except Exception as e:
136
- # print(f"{e}")
137
- # return None, None
138
-
139
  if success:
140
  inference = Inference(
141
  model_name=model_url,
 
9
  import asyncio
10
  from elevenlabs import voices, generate, save
11
  from elevenlabs.api.error import UnauthenticatedRateLimitError
12
+ from neon_tts_plugin_coqui import CoquiTTS
13
+ import tempfile
14
 
15
+ # Elevenlabs
16
  ELEVENLABS_VOICES_RAW = voices()
17
 
18
  def get_elevenlabs_voice_names():
 
23
 
24
  ELEVENLABS_VOICES_NAMES = get_elevenlabs_voice_names()
25
 
26
+ # CoquiTTS
27
+ COQUI_LANGUAGES = list(CoquiTTS.langs.keys())
28
+ coquiTTS = CoquiTTS()
 
 
29
 
30
+ def tts_infer(tts_text, model_url, tts_method, tts_model, tts_api_key, language):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  if not tts_text:
32
  return 'Primero escribe el texto que quieres convertir.', None
33
  if not tts_model:
 
43
  tts_text = tts_text[:60]
44
  print("DEMO; limit to 60 characters")
45
 
 
46
  if tts_method == "Edge-tts":
47
+ language = tts_model[:2]
48
  try:
49
  asyncio.run(
50
  edge_tts.Communicate(
 
66
  tts.save(converted_tts_filename)
67
  print("Error: Audio will be replaced.")
68
  success = False
69
+
70
+ # if tts_method == "Tortoise":
71
+ # api.TextToSpeech()
72
+
73
+ if tts_method == "CoquiTTS":
74
+ print(tts_text, language)
75
+ # return output
76
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
77
+ coquiTTS.get_tts(tts_text, fp, speaker = {"language" : language})
78
+ return fp.name
79
+
80
  if tts_method == 'ElevenLabs':
81
  try:
82
  audio = generate(
 
92
 
93
  if not model_url:
94
  return 'Pon la url del modelo si quieres aplicarle otro tono.', converted_tts_filename
 
 
 
 
 
 
 
 
 
 
95
 
 
 
 
 
 
 
 
 
96
  if success:
97
  inference = Inference(
98
  model_name=model_url,