Spaces:
Sleeping
Sleeping
File size: 6,675 Bytes
1a572e4 5332e66 db4880c 71a2b8b db4880c 1a572e4 71a2b8b 5332e66 1a572e4 5332e66 1a572e4 71a2b8b 5332e66 db4880c 1a572e4 db4880c 1a572e4 71a2b8b db4880c 71a2b8b 1a572e4 db4880c 71a2b8b 1a572e4 71a2b8b 1a572e4 71a2b8b 1a572e4 db4880c 1a572e4 71a2b8b 1a572e4 71a2b8b 1a572e4 71a2b8b db4880c 71a2b8b 1a572e4 71a2b8b 7a3b53b 5332e66 7a3b53b 1a572e4 7a3b53b 71a2b8b db4880c 71a2b8b db4880c 1a572e4 71a2b8b 5332e66 71a2b8b 1a572e4 71a2b8b 1a572e4 db4880c 71a2b8b db4880c 1a572e4 db4880c e94f209 1a572e4 71a2b8b db4880c e94f209 db4880c 1a572e4 7a3b53b db4880c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
import binascii
import os
import gradio as gr
import librosa
import numpy as np
import pretty_midi
import torch
import yt_dlp
from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
from utils import cli_to_api, mp3_write, normalize
yt_video_dir = "./yt_dir"
outputs_dir = "./midi_wav_outputs"
os.makedirs(outputs_dir, exist_ok=True)
os.makedirs(yt_video_dir, exist_ok=True)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano").to(device)
processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
composers = model.generation_config.composer_to_feature_token.keys()
def get_audio_from_yt_video(yt_link: str):
filename = binascii.hexlify(os.urandom(8)).decode() + ".mp3"
filename = os.path.join(yt_video_dir, filename)
yt_opt = cli_to_api(
[
"--extract-audio",
"--audio-format",
"mp3",
"--restrict-filenames",
"-o",
filename,
]
)
with yt_dlp.YoutubeDL(yt_opt) as ydl:
ydl.download([yt_link])
return filename, filename
def inference(file_uploaded, composer):
# to save the native sampling rate of the file, sr=None is used, but this can cause some silent errors where the
# generated output will not be upto the desired quality. If that happens please consider switching sr to 44100 Hz.
pop_y, sr = librosa.load(file_uploaded, sr=None)
inputs = processor(audio=pop_y, sampling_rate=sr, return_tensors="pt").to(device)
model_output = model.generate(input_features=inputs["input_features"], composer=composer)
tokenizer_output = processor.batch_decode(
token_ids=model_output.to("cpu"), feature_extractor_output=inputs.to("cpu")
)["pretty_midi_objects"]
return prepare_output_file(tokenizer_output, sr, pop_y)
def prepare_output_file(tokenizer_output: pretty_midi.PrettyMIDI, sr: int, pop_y: np.ndarray):
# Add some random values so that no two file names are same
output_file_name = "p2p_" + binascii.hexlify(os.urandom(8)).decode()
midi_output = os.path.join(outputs_dir, output_file_name + ".mid")
# write the .mid and its wav files
tokenizer_output[0].write(midi_output)
midi_y: np.ndarray = tokenizer_output[0].fluidsynth(sr)
midi_y_path: str = midi_output.replace(".mid", ".mp3")
mp3_write(midi_y_path, sr, normalize(midi_y), normalized=True)
# stack stereo audio
if len(pop_y) > len(midi_y):
midi_y = np.pad(midi_y, (0, len(pop_y) - len(midi_y)))
elif len(pop_y) < len(midi_y):
pop_y = np.pad(pop_y, (0, -len(pop_y) + len(midi_y)))
stereo = np.stack((midi_y, pop_y * 0.5))
# write stereo audio
stereo_path = midi_output.replace(".mid", ".mix.mp3")
mp3_write(stereo_path, sr, normalize(stereo.T), normalized=True)
return midi_y_path, midi_y_path, midi_output, stereo_path, stereo_path
block = gr.Blocks()
with block:
gr.HTML(
"""
<div style="text-align: center; max-width: 400px; margin: 0 auto;">
<div
style="
display: inline-flex;
align-items: center;
gap: 0.8rem;
font-size: 1.75rem;
"
>
<h1 style="font-weight: 900; margin-bottom: 7px;">
Pop2piano
</h1>
</div>
<p style="margin-bottom: 10px; font-size: 94%">
A demo for Pop2Piano:Pop Audio-based Piano Cover Generation.<br>
Please select the composer(Arranger) and upload the pop audio or enter the YouTube link and then click Generate.
</p>
</div>
"""
)
with gr.Group():
with gr.Column():
with gr.Blocks() as audio_select:
with gr.Tab("Upload Audio"):
file_uploaded = gr.Audio(label="Upload an audio", type="filepath")
with gr.Tab("YouTube url"):
with gr.Row():
yt_link = gr.Textbox(
label="Enter YouTube Link of the Video", autofocus=True, lines=3
)
yt_btn = gr.Button("Download Audio from YouTube Link", size="lg")
yt_audio_path = gr.Audio(
label="Audio Extracted from the YouTube Video", interactive=False
)
yt_btn.click(
get_audio_from_yt_video,
inputs=[yt_link],
outputs=[yt_audio_path, file_uploaded],
)
with gr.Column():
composer = gr.Dropdown(label="Arranger", choices=composers, value="composer1")
generate_btn = gr.Button("Generate")
with gr.Group():
gr.HTML(
"""
<div> <h3> <center> Listen to the generated MIDI. </h3> </div>
"""
)
with gr.Row(equal_height=True):
stereo_mix1 = gr.Audio(label="Listen to the Stereo Mix")
wav_output1 = gr.Audio(label="Listen to the Generated MIDI")
with gr.Row():
stereo_mix2 = gr.File(label="Download the Stereo Mix (.mp3")
wav_output2 = gr.File(label="Download the Generated MIDI (.mp3)")
midi_output = gr.File(label="Download the Generated MIDI (.mid)")
generate_btn.click(
inference,
inputs=[file_uploaded, composer],
outputs=[wav_output1, wav_output2, midi_output, stereo_mix1, stereo_mix2],
)
with gr.Group():
gr.Examples(
[
["./examples/custom_song.mp3", "composer1"],
],
fn=inference,
inputs=[file_uploaded, composer],
outputs=[wav_output1, wav_output2, midi_output, stereo_mix1, stereo_mix2],
cache_examples=True,
)
gr.HTML(
"""
<div class="footer">
<center><p><a href="http://sweetcocoa.github.io/pop2piano_samples" style="text-decoration: underline;" target="_blank">Project Page</a>
<center><a href="https://huggingface.co/docs/transformers/main/model_doc/pop2piano" style="text-decoration: underline;" target="_blank">HuggingFace Model Docs</a>
<center><a href="https://github.com/sweetcocoa/pop2piano" style="text-decoration: underline;" target="_blank">Github</a>
</p>
</div>
"""
)
block.launch(debug=False)
|