|
|
|
import tensorflow as tf |
|
import numpy as np |
|
import pandas as pd |
|
|
|
|
|
import pretty_midi |
|
|
|
|
|
from IPython import display |
|
|
|
|
|
|
|
import sys |
|
from pathlib import Path |
|
|
|
directory = Path(__file__).resolve().parent |
|
sys.path.insert(0, str(directory)) |
|
|
|
from model import VAE |
|
|
|
|
|
import collections |
|
|
|
|
|
_CAP = 3501 |
|
_SAMPLING_RATE = 16000 |
|
_INSTRUMENT_NAME = "Acoustic Grand Piano" |
|
_SCALING_FACTORS = pd.Series( |
|
{"pitch": 64.024558, "step": 0.101410, "duration": 0.199386} |
|
) |
|
|
|
def midi_to_notes(midi_file: str) -> pd.DataFrame: |
|
""" |
|
Convert midi file to "song map" (dataframe where each note is broken |
|
into its components) |
|
|
|
Parameters: |
|
midi_file (str): Path to the midi file. |
|
|
|
Returns: |
|
pd.Dataframe: 3xN matrix where each column is a note, composed of |
|
pitch, duration and step. |
|
""" |
|
|
|
pm = pretty_midi.PrettyMIDI(midi_file) |
|
instrument = pm.instruments[0] |
|
notes = collections.defaultdict(list) |
|
|
|
|
|
sorted_notes = sorted(instrument.notes, key=lambda note: note.start) |
|
prev_start = sorted_notes[0].start |
|
|
|
|
|
for note in sorted_notes: |
|
start = note.start |
|
end = note.end |
|
notes['pitch'].append(note.pitch) |
|
notes['step'].append(start - prev_start) |
|
notes['duration'].append(end - start) |
|
prev_start = start |
|
|
|
|
|
|
|
notes_df = pd.DataFrame({name: np.array(value) for name, value in notes.items()}) |
|
notes_df = notes_df[:_CAP] |
|
return notes_df / _SCALING_FACTORS |
|
|
|
|
|
def display_audio(pm: pretty_midi.PrettyMIDI, seconds=-1) -> display.Audio: |
|
""" |
|
Display a song in PrettyMIDI format as a display.Audio object. |
|
This method specially comes in useful in a jupyter notebook. |
|
|
|
Parameters: |
|
pm (str): PrettyMidi object containing a song. |
|
seconds (int): Time fraction of the song to be displayed. When |
|
set to -1, the full length is taken. |
|
|
|
Returns: |
|
display.Audio: Song as an object allowing for display. |
|
""" |
|
|
|
waveform = pm.fluidsynth(fs=_SAMPLING_RATE) |
|
|
|
if seconds == -1: |
|
waveform_short = waveform[:] |
|
else: |
|
waveform_short = waveform[:seconds*_SAMPLING_RATE] |
|
|
|
return display.Audio(waveform_short, rate=_SAMPLING_RATE) |
|
|
|
|
|
def map_to_wav(song_map: pd.DataFrame, out_file: str, velocity: int=50) -> pretty_midi.PrettyMIDI: |
|
""" |
|
Convert "song map" to midi file (reverse process with respect to |
|
midi_to_notes) and (optionally) save it, generating a PrettyMidi object in the process. |
|
|
|
Parameters: |
|
song_map (pd.DataFrame): 3xN matrix where each column is a note, composed of |
|
pitch, duration and step. |
|
out_file (str): Path or file to write .mid file to. If None, no saving is done. |
|
velocity: Note loudness, i. e. the hardness a piano key is struck with. |
|
|
|
Returns: |
|
pretty_midi.PrettyMIDI: PrettyMIDI object containing the song's representation. |
|
""" |
|
|
|
contracted_map = tf.squeeze(song_map) |
|
song_map_T = contracted_map.numpy().T |
|
notes = pd.DataFrame(song_map_T, columns=["pitch", "step", "duration"]).mul(_SCALING_FACTORS, axis=1) |
|
notes["pitch"] = notes["pitch"].astype('int32').clip(1, 127) |
|
|
|
pm = pretty_midi.PrettyMIDI() |
|
instrument = pretty_midi.Instrument( |
|
program=pretty_midi.instrument_name_to_program( |
|
_INSTRUMENT_NAME)) |
|
|
|
prev_start = 0 |
|
for i, note in notes.iterrows(): |
|
start = float(prev_start + note['step']) |
|
end = float(start + note['duration']) |
|
note = pretty_midi.Note( |
|
velocity=velocity, |
|
pitch=int(note['pitch']), |
|
start=start, |
|
end=end, |
|
) |
|
instrument.notes.append(note) |
|
prev_start = start |
|
|
|
pm.instruments.append(instrument) |
|
if (out_file): |
|
pm.write(out_file) |
|
return pm |
|
|
|
def generate_and_display(model: VAE, |
|
out_file: str=None, |
|
z_sample: tf.Tensor=None, |
|
velocity: int=50, |
|
seconds: int=120) -> display.Audio: |
|
""" |
|
Generate a song, (optionally) save it and display it. |
|
|
|
Parameters: |
|
model (VAE): Instance of VAE to generate the song with. |
|
out_file (str): Path or file to write .mid file to. If None, no saving is done. |
|
z_sample (tf.Tensor): Song encoding used to generate a song. If None, perform |
|
generate an unconditioned piece. |
|
velocity: Note loudness, i. e. the hardness a piano key is struck with. |
|
seconds (int): Time fraction of the song to be displayed. When |
|
set to -1, the full length is taken. |
|
|
|
Returns: |
|
display.Audio: Song as an object allowing for display. |
|
""" |
|
|
|
song_map = model.generate(z_sample) |
|
wav = map_to_wav(song_map, out_file, velocity) |
|
|
|
return display_audio(wav, seconds) |