import gradio as gr from diffusers import AudioLDMControlNetPipeline, ControlNetModel from pretty_midi import PrettyMIDI import torch if torch.cuda.is_available(): device = "cuda" torch_dtype = torch.float16 else: device = "cpu" torch_dtype = torch.float32 midi = PrettyMIDI("test.mid") controlnet = ControlNetModel.from_pretrained("lauraibnz/midi-audioldm", torch_dtype=torch_dtype) pipe = AudioLDMControlNetPipeline.from_pretrained("cvssp/audioldm-m-full", controlnet=controlnet, torch_dtype=torch_dtype) pipe = pipe.to(device) audio = pipe("techno", audio_length_in_s=10, num_inference_steps=20, midi=midi, controlnet_conditioning_scale=1.0)