Spaces:
Sleeping
Sleeping
File size: 1,540 Bytes
0291473 431cf64 f0a6291 0a716a3 431cf64 4518a48 431cf64 ba020f3 5d8dc18 431cf64 4518a48 f0a6291 431cf64 f0a6291 709af2a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import gradio as gr
from diffusers import AudioLDMControlNetPipeline, ControlNetModel
from pretty_midi import PrettyMIDI
import torch
if torch.cuda.is_available():
device = "cuda"
torch_dtype = torch.float16
else:
device = "cpu"
torch_dtype = torch.float32
controlnet = ControlNetModel.from_pretrained("lauraibnz/midi-audioldm", torch_dtype=torch_dtype)
pipe = AudioLDMControlNetPipeline.from_pretrained("cvssp/audioldm-m-full", controlnet=controlnet, torch_dtype=torch_dtype)
pipe = pipe.to(device)
def predict(midi_file=None, prompt="", negative_prompt="", audio_length_in_s=5, controlnet_conditioning_scale=1, num_inference_steps=20, guess_mode=False):
midi_file = midi_file.name
midi = PrettyMIDI(midi_file)
audio = pipe(
prompt,
negative_prompt=negative_prompt,
midi=midi,
audio_length_in_s=audio_length_in_s,
num_inference_steps=num_inference_steps,
controlnet_conditioning_scale=float(controlnet_conditioning_scale),
guess_mode=guess_mode,
)
return (16000, audio.audios.T)
demo = gr.Interface(fn=predict, inputs=[
gr.File(file_types=[".mid"]),
"text",
gr.Textbox(label="negative prompt"),
gr.Slider(0, 30, value=10, step=5, label="duration (seconds)"),
gr.Slider(0.0, 1.0, value=1.0, step=0.1, label="conditioning scale"),
gr.Slider(0, 50, value=20, step=0.1, label="inference steps"),
gr.Checkbox(label="guess mode")
], outputs="audio", examples=["test.mid", "piano"], cache_examples=True)
demo.launch() |