Bartusito commited on
Commit
e06a3df
1 Parent(s): 7e4bb2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -14
app.py CHANGED
@@ -3,23 +3,13 @@ import numpy as np
3
  from huggingsound import SpeechRecognitionModel
4
  from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
5
  from transformers import pipeline
6
- import librosa
7
 
8
  # Función para convertir la tasa de muestreo del audio de entrada
9
  def modelo1(audio):
10
- audio_data, sample_rate = audio
11
- # Asegurarse de que audio_data sea un array NumPy
12
- if not isinstance(audio_data, np.ndarray):
13
- audio_data = np.array(audio_data)
14
 
15
- # Convertir audio estéreo a mono
16
- if audio_data.shape[0] == 2:
17
- audio_data = np.mean(audio_data, axis=0)
18
-
19
- # Utilizar audio_data como entrada para el modelo
20
- whisper = pipeline('automatic-speech-recognition', model='openai/whisper-medium', device=-1) # Cambia 'device' a -1 para usar la CPU
21
- text = whisper(audio_data, sample_rate)
22
- return text
23
 
24
  def modelo2(text):
25
  model_id = "stabilityai/stable-diffusion-2-1"
@@ -38,5 +28,5 @@ def execution(audio):
38
  return modelo2res
39
 
40
  if __name__ == "__main__":
41
- demo = gr.Interface(fn=modelo1, inputs="audio", outputs="text")
42
  demo.launch()
 
3
  from huggingsound import SpeechRecognitionModel
4
  from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
5
  from transformers import pipeline
 
6
 
7
  # Función para convertir la tasa de muestreo del audio de entrada
8
  def modelo1(audio):
 
 
 
 
9
 
10
+ whisper = pipeline('automatic-speech-recognition', model='openai/whisper-medium', device=0) # Cambia 'device' a -1 para usar la CPU
11
+ text = whisper('audio.mp3')
12
+ return text["text"]
 
 
 
 
 
13
 
14
  def modelo2(text):
15
  model_id = "stabilityai/stable-diffusion-2-1"
 
28
  return modelo2res
29
 
30
  if __name__ == "__main__":
31
+ demo = gr.Interface(fn=execution, inputs="audio", outputs="image")
32
  demo.launch()