AudioToImage / app.py
Bartusito's picture
Update app.py
e06a3df
raw
history blame
No virus
1.11 kB
import gradio as gr
import numpy as np
from huggingsound import SpeechRecognitionModel
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
from transformers import pipeline
# Función para convertir la tasa de muestreo del audio de entrada
def modelo1(audio):
whisper = pipeline('automatic-speech-recognition', model='openai/whisper-medium', device=0) # Cambia 'device' a -1 para usar la CPU
text = whisper('audio.mp3')
return text["text"]
def modelo2(text):
model_id = "stabilityai/stable-diffusion-2-1"
# Use the DPMSolverMultistepScheduler (DPM-Solver++) scheduler here instead
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe = pipe.to("cuda")
image = pipe(text).images[0]
return image
def execution(audio):
modelo1res = modelo1(audio)
modelo2res = modelo2(modelo1res)
return modelo2res
if __name__ == "__main__":
demo = gr.Interface(fn=execution, inputs="audio", outputs="image")
demo.launch()