AudioToImage / app.py
Bartusito's picture
Update app.py
123f417
raw
history blame
1.22 kB
import gradio as gr
import numpy as np
import torch
from huggingsound import SpeechRecognitionModel
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
from transformers import pipeline
# Funci贸n para convertir la tasa de muestreo del audio de entrada
def modelo1(audio):
print(audio)
whisper = pipeline('automatic-speech-recognition', model='openai/whisper-medium', device=-1) # Cambia 'device' a -1 para usar la CPU
print(np.array(audio[1]))
text = whisper(np.array(audio[1]))
return text["text"]
def modelo2(text):
model_id = "stabilityai/stable-diffusion-2-1"
# Use the DPMSolverMultistepScheduler (DPM-Solver++) scheduler here instead
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe = pipe.to("cpu")
print(text)
# Conversi贸n a float32
image = pipe(text).images[0]
return image
def execution(audio):
modelo1res = modelo1(audio)
modelo2res = modelo2(modelo1res)
return modelo2res
if __name__ == "__main__":
demo = gr.Interface(fn=execution, inputs="audio", outputs="image")
demo.launch()