Spaces:
Sleeping
Sleeping
import gradio as gr | |
from diffusers import AudioLDMControlNetPipeline, ControlNetModel | |
from pretty_midi import PrettyMIDI | |
import torch | |
if torch.cuda.is_available(): | |
device = "cuda" | |
torch_dtype = torch.float16 | |
else: | |
device = "cpu" | |
torch_dtype = torch.float32 | |
controlnet = ControlNetModel.from_pretrained("lauraibnz/midi-audioldm", torch_dtype=torch_dtype) | |
pipe = AudioLDMControlNetPipeline.from_pretrained("cvssp/audioldm-m-full", controlnet=controlnet, torch_dtype=torch_dtype) | |
pipe = pipe.to(device) | |
def predict(midi_file=None, prompt="", negative_prompt="", audio_length_in_s=5, controlnet_conditioning_scale=1, num_inference_steps=20, guess_mode=False): | |
midi_file = midi_file.name | |
midi = PrettyMIDI(midi_file) | |
audio = pipe( | |
prompt, | |
negative_prompt=negative_prompt, | |
midi=midi, | |
audio_length_in_s=audio_length_in_s, | |
num_inference_steps=num_inference_steps, | |
controlnet_conditioning_scale=float(controlnet_conditioning_scale), | |
guess_mode=guess_mode, | |
) | |
return (16000, audio.audios.T) | |
demo = gr.Interface(fn=predict, inputs=[ | |
gr.File(file_types=[".mid"]), | |
"text", | |
gr.Textbox(label="negative prompt"), | |
gr.Slider(0, 30, value=10, step=5, label="duration (seconds)"), | |
gr.Slider(0.0, 1.0, value=1.0, step=0.1, label="conditioning scale"), | |
gr.Slider(0, 50, value=20, step=0.1, label="inference steps"), | |
gr.Checkbox(label="guess mode") | |
], outputs="audio", examples=[["S01.mid", "piano", "", 10, 1.0, 20, False]], cache_examples=True) | |
demo.launch() |