Spaces:
Build error
Build error
File size: 4,858 Bytes
ef98e8e 2daf15a 0760318 cf24f3c 2ab00ef d58023a 0760318 13af1af 1f1f657 13af1af d9489e4 85250f0 ed28ae4 85250f0 d58023a bc718b3 0627c0d bc718b3 8eb2669 bc718b3 2ab00ef 7a8f502 2ab00ef 7a8f502 85250f0 cf24f3c 7a8f502 cf24f3c 7a8f502 c0fa1b2 cf24f3c bc718b3 c0fa1b2 2a4494f bc718b3 1a06f79 ed28ae4 a5934c8 b37a7e3 fa0462c b37a7e3 1f1f657 089aaf1 6b76298 b732dcb 9875faf 85250f0 ed28ae4 cf24f3c 6b76298 ed28ae4 8eb2669 85250f0 ed28ae4 85250f0 ed28ae4 7a8f502 ed28ae4 80f9ee2 22467cb 80f9ee2 a3ff253 80f9ee2 058f5c3 7c17274 1f1f657 bdfb432 2a4494f a3ff253 9cdb5a4 0241227 058f5c3 bdfb432 be33ae4 ed28ae4 11e63a0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import os
os.system("python3 -m pip install -e .")
import gradio as gr
import note_seq
from pytube import YouTube
from pydub import AudioSegment
from music21 import converter, environment
from inferencemodel import InferenceModel
from utils import upload_audio, create_image_from_note_sequence
import nest_asyncio
nest_asyncio.apply()
SAMPLE_RATE = 16000
SF2_PATH = "SGM-v2.01-Sal-Guit-Bass-V1.3.sf2"
# Set up music21 with musescore
us = environment.UserSettings()
us["musescoreDirectPNGPath"] = "/usr/bin/mscore3"
os.putenv("QT_QPA_PLATFORM", "offscreen")
os.putenv("XDG_RUNTIME_DIR", environment.Environment().getRootTempDir())
def load_model(model=str):
checkpoint_path = f"/home/user/app/checkpoints/{model}/"
# Start inference model
inference_model = InferenceModel(checkpoint_path, model)
return inference_model
# Credits https://huggingface.co/spaces/rajesh1729/youtube-video-transcription-with-whisper
def get_audio(url):
yt = YouTube(url)
video = yt.streams.filter(only_audio=True).first()
out_file = video.download(output_path=".")
base, ext = os.path.splitext(out_file)
new_file = base + ".wav"
os.rename(out_file, new_file)
a = new_file
return a
# Credits https://huggingface.co/spaces/jeffistyping/Youtube-Whisperer
def populate_metadata(link):
yt = YouTube(link)
audio = get_audio(link)
return yt.thumbnail_url, yt.title, audio, audio
def inference(yt_audio_path, model):
with open(yt_audio_path, 'rb') as fd:
contents = fd.read()
audio = upload_audio(contents,sample_rate=SAMPLE_RATE)
inference_model = load_model(model)
est_ns = inference_model(audio)
note_seq.sequence_proto_to_midi_file(est_ns, "./transcribed.mid")
synth = note_seq.midi_synth.fluidsynth
array_of_floats = synth(est_ns, sample_rate=SAMPLE_RATE, sf2_path=SF2_PATH)
int16_data = note_seq.audio_io.float_samples_to_int16(array_of_floats)
piano_roll = create_image_from_note_sequence(est_ns)
parsed = converter.parse("./transcribed.mid")
score = parsed.write("musicxml.png")
return "./transcribed.mid", (SAMPLE_RATE, int16_data), piano_roll, score
title = "Transcribe music from YouTube videos using Transformers."
description = """
Gradio demo for Music Transcription with Transformers. Read more in the links below.
To use this demo, just add a YouTube link with the music you want to transcribe.
"""
article = "<p style='text-align: center'><a href='https://magenta.tensorflow.org/transcription-with-transformers' target='_blank'>Blog: Music Transcription with Transformers</a> | <a href='https://github.com/magenta/mt3' target='_blank'>Github Repo</a></p>"
# Create a block object
demo = gr.Blocks()
# Use your Block object as a context
with demo:
gr.Markdown("<h1 style='text-align: center'>"
+ title
+ "</h1>")
gr.Markdown(description)
with gr.Box():
with gr.Box():
model_label = """
What kind of model you want to use?
The ismir2021 model transcribes piano only, with note velocities.
The mt3 model transcribes multiple simultaneous instruments, but without velocities.
"""
model = gr.Radio(
["mt3"],
label=model_label,
value="mt3"
)
with gr.Row():
link = gr.Textbox(label="YouTube Link")
with gr.Row():
preview_btn = gr.Button("Preview")
with gr.Box():
with gr.Row().style(mobile_collapse=False, equal_height=True):
title = gr.Label(label="Video Title", placeholder="Title")
img = gr.Image(label="Thumbnail")
with gr.Row():
yt_audio = gr.Audio()
yt_audio_path = gr.Textbox(visible=False)
preview_btn.click(fn=populate_metadata,
inputs=[link],
outputs=[img, title, yt_audio, yt_audio_path])
with gr.Row():
btn = gr.Button("Transcribe music")
with gr.Row():
midi_file = gr.File()
midi_audio = gr.Audio()
with gr.Row():
piano_roll = gr.Image()
score = gr.Image()
btn.click(inference,
inputs=[yt_audio_path, model],
outputs=[midi_file, midi_audio, piano_roll, score],
api_name="transcribe_wav_to_midi")
gr.Markdown('''
[![Twitter Follow](https://img.shields.io/twitter/follow/juancopi81?style=social)](https://twitter.com/juancopi81)
![visitors](https://visitor-badge.glitch.me/badge?page_id=Juancopi81.YoutubeMusicTranscribe)
''')
gr.Markdown(article)
demo.launch() |