pivaenist / audio.py

Changed `map_to_wav` to `notes_to_midi`

4537fd9 over 1 year ago

6 kB

	# Machine learning, flow and data
	import tensorflow as tf
	import numpy as np
	import pandas as pd

	# Audio
	import pretty_midi

	# Displaying
	from IPython import display

	# Get the absolute path of the directory and add it to sys.path in order to
	# get the VAE class type
	import sys
	from pathlib import Path

	directory = Path(__file__).resolve().parent
	sys.path.insert(0, str(directory))

	from model import VAE

	# Extras
	import collections


	_CAP = 3501 # Cap for the number of notes
	_SAMPLING_RATE = 16000 # Parameter to pass continuous signal to a discrete one
	_INSTRUMENT_NAME = "Acoustic Grand Piano" # MIDI instrument used
	_SCALING_FACTORS = pd.Series(
	{"pitch": 64.024558, "step": 0.101410, "duration": 0.199386}
	) # Factors used to normalize song maps

	def midi_to_notes(midi_file: str) -> pd.DataFrame:
	"""
	Convert midi file to "song map" (dataframe where each note is broken
	into its components). The song must have at least 3501 notes.

	Parameters
	----------

	midi_file : str
	Path to the midi file.

	Returns
	-------
	song_map : pd.Dataframe
	3xN matrix where each column is a note, composed of pitch, duration and step.
	"""

	pm = pretty_midi.PrettyMIDI(midi_file)
	instrument = pm.instruments[0]
	notes = collections.defaultdict(list)

	# Sort the notes by start time
	sorted_notes = sorted(instrument.notes, key=lambda note: note.start)

	if len(sorted_notes) < 3501:
	raise ValueError("Song must have at least 3501 notes.")

	prev_start = sorted_notes[0].start

	# Separate each individual note in pitch, step and duration
	for note in sorted_notes:
	start = note.start
	end = note.end
	notes['pitch'].append(note.pitch)
	notes['step'].append(start - prev_start)
	notes['duration'].append(end - start)
	prev_start = start


	# Put notes in a dataframe
	notes_df = pd.DataFrame({name: np.array(value) for name, value in notes.items()})
	notes_df = notes_df[:_CAP] # Cap the song to match the model's architecture
	song_map = (notes_df / _SCALING_FACTORS).T # Scale and get transpose
	return song_map


	def display_audio(pm: pretty_midi.PrettyMIDI, seconds=-1) -> display.Audio:
	"""
	Display a song in PrettyMIDI format as a display.Audio object.
	This method specially comes in useful in a jupyter notebook.

	Parameters
	----------

	pm : str
	PrettyMidi object containing a song.
	seconds : int
	Time fraction of the song to be displayed.
	Default ``-1``, for which the full length is taken.

	Returns
	-------
	display_obj : display.Audio
	Song as an object allowing for display.
	"""

	waveform = pm.fluidsynth(fs=_SAMPLING_RATE)
	# Take a sample of the generated waveform to mitigate kernel resets
	if seconds == -1:
	waveform_short = waveform[:]
	else:
	waveform_short = waveform[:seconds*_SAMPLING_RATE]

	display_obj = display.Audio(waveform_short, rate=_SAMPLING_RATE)

	return display_obj


	def notes_to_midi(song_map: pd.DataFrame, out_file: str, velocity: int=50) -> pretty_midi.PrettyMIDI:
	"""
	Convert "song map" to midi file (reverse process with respect to
	midi_to_notes) and (optionally) save it, generating a PrettyMidi object in the process.

	Parameters
	----------

	song_map : pd.DataFrame
	3xN matrix where each column is a note, composed of pitch, duration and step.
	out_file : str
	Path or file to write .mid file to. If None, no saving is done.
	velocity : int
	Note loudness, i. e. the hardness a piano key is struck with.
	Default ``50``.

	Returns
	-------

	pm : pretty_midi.PrettyMIDI
	PrettyMIDI object containing the song's representation.
	"""

	# Get song map as dataframe
	contracted_map = tf.squeeze(song_map)
	song_map_T = contracted_map.numpy().T
	notes = pd.DataFrame(song_map_T, columns=["pitch", "step", "duration"]).mul(_SCALING_FACTORS, axis=1)
	notes["pitch"] = notes["pitch"].astype('int32').clip(1, 127)

	# Instantiate PrettyMIDI object and append notes
	pm = pretty_midi.PrettyMIDI()
	instrument = pretty_midi.Instrument(
	program=pretty_midi.instrument_name_to_program(
	_INSTRUMENT_NAME))

	prev_start = 0
	for i, note in notes.iterrows():
	# The VAE might generate notes with negative step and duration,
	# and we therefore need to make sure to skip these anomalies
	if (note['step'] < 0 or note['duration'] < 0):
	continue

	start = float(prev_start + note['step'])
	end = float(start + note['duration'])
	note = pretty_midi.Note(
	velocity=velocity,
	pitch=int(note['pitch']),
	start=start,
	end=end,
	)
	instrument.notes.append(note)
	prev_start = start

	pm.instruments.append(instrument)

	# If a path was specified, save as midi file
	if out_file:
	pm.write(out_file)
	return pm

	def generate_and_display(model: VAE,
	out_file: str=None,
	z_sample: tf.Tensor=None,
	velocity: int=50,
	seconds: int=-1) -> display.Audio:
	"""
	Generate a song, (optionally) save it and display it.

	Parameters
	----------
	model : VAE
	Instance of VAE to generate the song with.
	out_file : str
	Path or file to write .mid file to.
	Default ``None``, for which no saving is done.
	z_sample : tf.Tensor
	Song encoding used to generate a song.
	Default ``None``, for which an unconditioned piece is generated.
	velocity : int
	Note loudness, i. e. the hardness a piano key is struck with.
	Default ``50``.
	seconds : int
	Time fraction of the song to be displayed.
	Default ``-1``, for which the full length is taken.

	Returns
	-------
	display_obj : display.Audio
	Song as an object allowing for display.
	"""

	song_map = model.decode(z_sample)
	wav = notes_to_midi(song_map, out_file, velocity)
	display_obj = display_audio(wav, seconds)

	return display_obj