Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import os
|
2 |
import re
|
3 |
import time
|
4 |
import sys
|
@@ -59,12 +59,6 @@ def predict(prompt, language, reference_audio):
|
|
59 |
top_k = config.model_args.get("top_k", 50)
|
60 |
top_p = config.model_args.get("top_p", 0.85)
|
61 |
|
62 |
-
# Parámetros agregados
|
63 |
-
num_gpt_outputs = config.model_args.get("num_gpt_outputs", 32)
|
64 |
-
gpt_cond_len = config.model_args.get("gpt_cond_len", 24)
|
65 |
-
gpt_cond_chunk_len = config.model_args.get("gpt_cond_chunk_len", 4)
|
66 |
-
max_ref_len = config.model_args.get("max_ref_len", 10)
|
67 |
-
|
68 |
gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(
|
69 |
audio_path=reference_audio
|
70 |
)
|
@@ -80,17 +74,14 @@ def predict(prompt, language, reference_audio):
|
|
80 |
length_penalty=length_penalty,
|
81 |
repetition_penalty=repetition_penalty,
|
82 |
top_k=top_k,
|
83 |
-
top_p=top_p
|
84 |
-
num_gpt_outputs=num_gpt_outputs, # Número de muestras del modelo
|
85 |
-
gpt_cond_len=gpt_cond_len, # Longitud del condicionamiento
|
86 |
-
gpt_cond_chunk_len=gpt_cond_chunk_len, # Tamaño del fragmento de audio
|
87 |
-
max_ref_len=max_ref_len # Máximo de segundos para condicionamiento
|
88 |
)
|
89 |
|
90 |
inference_time = time.time() - start_time
|
91 |
|
92 |
output_path = "pedro_labattaglia_TTS.wav"
|
93 |
# Guardar el audio directamente desde el output del modelo
|
|
|
94 |
wavfile.write(output_path, config.audio["output_sample_rate"], out["wav"])
|
95 |
|
96 |
audio_length = len(out["wav"]) / config.audio["output_sample_rate"] # duración del audio en segundos
|
@@ -159,6 +150,7 @@ with gr.Blocks(theme=theme) as demo:
|
|
159 |
generated_audio = gr.Audio(label="Audio generado", interactive=False)
|
160 |
metrics_output = gr.Textbox(label="Métricas", value="Tiempo de generación: -- segundos\nFactor de tiempo real: --")
|
161 |
|
|
|
162 |
# Configuración del botón para generar voz
|
163 |
generate_button.click(
|
164 |
predict,
|
@@ -178,4 +170,4 @@ demo.css = """
|
|
178 |
"""
|
179 |
|
180 |
if __name__ == "__main__":
|
181 |
-
demo.launch(auth=[("Pedro Labattaglia", "PL2024"), ("Invitado", "PLTTS2024")])
|
|
|
1 |
+
import os
|
2 |
import re
|
3 |
import time
|
4 |
import sys
|
|
|
59 |
top_k = config.model_args.get("top_k", 50)
|
60 |
top_p = config.model_args.get("top_p", 0.85)
|
61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(
|
63 |
audio_path=reference_audio
|
64 |
)
|
|
|
74 |
length_penalty=length_penalty,
|
75 |
repetition_penalty=repetition_penalty,
|
76 |
top_k=top_k,
|
77 |
+
top_p=top_p
|
|
|
|
|
|
|
|
|
78 |
)
|
79 |
|
80 |
inference_time = time.time() - start_time
|
81 |
|
82 |
output_path = "pedro_labattaglia_TTS.wav"
|
83 |
# Guardar el audio directamente desde el output del modelo
|
84 |
+
import scipy.io.wavfile as wavfile
|
85 |
wavfile.write(output_path, config.audio["output_sample_rate"], out["wav"])
|
86 |
|
87 |
audio_length = len(out["wav"]) / config.audio["output_sample_rate"] # duración del audio en segundos
|
|
|
150 |
generated_audio = gr.Audio(label="Audio generado", interactive=False)
|
151 |
metrics_output = gr.Textbox(label="Métricas", value="Tiempo de generación: -- segundos\nFactor de tiempo real: --")
|
152 |
|
153 |
+
|
154 |
# Configuración del botón para generar voz
|
155 |
generate_button.click(
|
156 |
predict,
|
|
|
170 |
"""
|
171 |
|
172 |
if __name__ == "__main__":
|
173 |
+
demo.launch(auth=[("Pedro Labattaglia", "PL2024"), ("Invitado", "PLTTS2024")])
|