|
|
|
import tensorflow as tf |
|
import numpy as np |
|
import pandas as pd |
|
|
|
|
|
import pretty_midi |
|
|
|
|
|
from IPython import display |
|
|
|
|
|
|
|
import sys |
|
from pathlib import Path |
|
|
|
directory = Path(__file__).resolve().parent |
|
sys.path.insert(0, str(directory)) |
|
|
|
from model import VAE |
|
|
|
|
|
import collections |
|
|
|
|
|
_CAP = 3501 |
|
_SAMPLING_RATE = 16000 |
|
_INSTRUMENT_NAME = "Acoustic Grand Piano" |
|
_SCALING_FACTORS = pd.Series( |
|
{"pitch": 64.024558, "step": 0.101410, "duration": 0.199386} |
|
) |
|
|
|
def midi_to_notes(midi_file: str) -> pd.DataFrame: |
|
""" |
|
Convert midi file to "song map" (dataframe where each note is broken |
|
into its components). The song must have at least 3501 notes. |
|
|
|
Parameters |
|
---------- |
|
|
|
midi_file : str |
|
Path to the midi file. |
|
|
|
Returns |
|
------- |
|
song_map : pd.Dataframe |
|
3xN matrix where each column is a note, composed of pitch, duration and step. |
|
""" |
|
|
|
pm = pretty_midi.PrettyMIDI(midi_file) |
|
instrument = pm.instruments[0] |
|
notes = collections.defaultdict(list) |
|
|
|
|
|
sorted_notes = sorted(instrument.notes, key=lambda note: note.start) |
|
|
|
if len(sorted_notes) < 3501: |
|
raise ValueError("Song must have at least 3501 notes.") |
|
|
|
prev_start = sorted_notes[0].start |
|
|
|
|
|
for note in sorted_notes: |
|
start = note.start |
|
end = note.end |
|
notes['pitch'].append(note.pitch) |
|
notes['step'].append(start - prev_start) |
|
notes['duration'].append(end - start) |
|
prev_start = start |
|
|
|
|
|
|
|
notes_df = pd.DataFrame({name: np.array(value) for name, value in notes.items()}) |
|
notes_df = notes_df[:_CAP] |
|
song_map = (notes_df / _SCALING_FACTORS).T |
|
return song_map |
|
|
|
|
|
def display_audio(pm: pretty_midi.PrettyMIDI, seconds=-1) -> display.Audio: |
|
""" |
|
Display a song in PrettyMIDI format as a display.Audio object. |
|
This method specially comes in useful in a jupyter notebook. |
|
|
|
Parameters |
|
---------- |
|
|
|
pm : str |
|
PrettyMidi object containing a song. |
|
seconds : int |
|
Time fraction of the song to be displayed. |
|
Default ``-1``, for which the full length is taken. |
|
|
|
Returns |
|
------- |
|
display_obj : display.Audio |
|
Song as an object allowing for display. |
|
""" |
|
|
|
waveform = pm.fluidsynth(fs=_SAMPLING_RATE) |
|
|
|
if seconds == -1: |
|
waveform_short = waveform[:] |
|
else: |
|
waveform_short = waveform[:seconds*_SAMPLING_RATE] |
|
|
|
display_obj = display.Audio(waveform_short, rate=_SAMPLING_RATE) |
|
|
|
return display_obj |
|
|
|
|
|
def notes_to_midi(song_map: pd.DataFrame, out_file: str, velocity: int=50) -> pretty_midi.PrettyMIDI: |
|
""" |
|
Convert "song map" to midi file (reverse process with respect to |
|
midi_to_notes) and (optionally) save it, generating a PrettyMidi object in the process. |
|
|
|
Parameters |
|
---------- |
|
|
|
song_map : pd.DataFrame |
|
3xN matrix where each column is a note, composed of pitch, duration and step. |
|
out_file : str |
|
Path or file to write .mid file to. If None, no saving is done. |
|
velocity : int |
|
Note loudness, i. e. the hardness a piano key is struck with. |
|
Default ``50``. |
|
|
|
Returns |
|
------- |
|
|
|
pm : pretty_midi.PrettyMIDI |
|
PrettyMIDI object containing the song's representation. |
|
""" |
|
|
|
|
|
contracted_map = tf.squeeze(song_map) |
|
song_map_T = contracted_map.numpy().T |
|
notes = pd.DataFrame(song_map_T, columns=["pitch", "step", "duration"]).mul(_SCALING_FACTORS, axis=1) |
|
notes["pitch"] = notes["pitch"].astype('int32').clip(1, 127) |
|
|
|
|
|
pm = pretty_midi.PrettyMIDI() |
|
instrument = pretty_midi.Instrument( |
|
program=pretty_midi.instrument_name_to_program( |
|
_INSTRUMENT_NAME)) |
|
|
|
prev_start = 0 |
|
for i, note in notes.iterrows(): |
|
|
|
|
|
if (note['step'] < 0 or note['duration'] < 0): |
|
continue |
|
|
|
start = float(prev_start + note['step']) |
|
end = float(start + note['duration']) |
|
note = pretty_midi.Note( |
|
velocity=velocity, |
|
pitch=int(note['pitch']), |
|
start=start, |
|
end=end, |
|
) |
|
instrument.notes.append(note) |
|
prev_start = start |
|
|
|
pm.instruments.append(instrument) |
|
|
|
|
|
if out_file: |
|
pm.write(out_file) |
|
return pm |
|
|
|
def generate_and_display(model: VAE, |
|
out_file: str=None, |
|
z_sample: tf.Tensor=None, |
|
velocity: int=50, |
|
seconds: int=-1) -> display.Audio: |
|
""" |
|
Generate a song, (optionally) save it and display it. |
|
|
|
Parameters |
|
---------- |
|
model : VAE |
|
Instance of VAE to generate the song with. |
|
out_file : str |
|
Path or file to write .mid file to. |
|
Default ``None``, for which no saving is done. |
|
z_sample : tf.Tensor |
|
Song encoding used to generate a song. |
|
Default ``None``, for which an unconditioned piece is generated. |
|
velocity : int |
|
Note loudness, i. e. the hardness a piano key is struck with. |
|
Default ``50``. |
|
seconds : int |
|
Time fraction of the song to be displayed. |
|
Default ``-1``, for which the full length is taken. |
|
|
|
Returns |
|
------- |
|
display_obj : display.Audio |
|
Song as an object allowing for display. |
|
""" |
|
|
|
song_map = model.decode(z_sample) |
|
wav = notes_to_midi(song_map, out_file, velocity) |
|
display_obj = display_audio(wav, seconds) |
|
|
|
return display_obj |