File size: 4,890 Bytes
22a2b4f
 
9b5281a
 
edac276
9b5281a
 
 
 
 
 
 
 
 
 
 
ac7d960
11cac8a
 
 
 
 
 
 
9ec6403
 
 
 
 
 
 
 
 
 
 
ac7d960
11cac8a
 
 
 
 
 
 
 
ac7d960
11cac8a
 
 
 
 
 
 
 
 
ac7d960
 
 
 
11cac8a
 
edac276
9ec6403
 
 
 
 
 
 
 
 
 
 
 
 
11cac8a
 
9ec6403
 
 
 
 
11cac8a
e878ec7
11cac8a
edac276
9ec6403
edac276
 
9ec6403
 
edac276
 
e878ec7
edac276
9ec6403
 
edac276
9ec6403
 
11cac8a
 
 
 
 
 
 
 
ac7d960
11cac8a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
edac276
 
11cac8a
 
e878ec7
 
 
 
 
edac276
e878ec7
edac276
 
e878ec7
 
 
 
 
 
 
edac276
 
e878ec7
edac276
 
11cac8a
ac7d960
11cac8a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# Machine learning, flow and data
import tensorflow as tf
import numpy as np
import pandas as pd
import VAE from model

# Audio
import pretty_midi

# Displaying
from IPython import display

# Extras
import collections


_CAP = 3501 # Cap for the number of notes
_SAMPLING_RATE = 16000 # Parameter to pass continuous signal to a discrete one
_INSTRUMENT_NAME = "Acoustic Grand Piano" # MIDI instrument used
_SCALING_FACTORS = pd.Series(
    {"pitch": 64.024558, "step": 0.101410, "duration": 0.199386}
) # Factors used to normalize song maps

def midi_to_notes(midi_file: str) -> pd.DataFrame:
  """
  Convert midi file to "song map" (dataframe where each note is broken
  into its components)

  Parameters:
  midi_file (str): Path to the midi file.

  Returns:
  pd.Dataframe: 3xN matrix where each column is a note, composed of
  pitch, duration and step.
  """
    
  pm = pretty_midi.PrettyMIDI(midi_file)
  instrument = pm.instruments[0]
  notes = collections.defaultdict(list)

  # Sort the notes by start time
  sorted_notes = sorted(instrument.notes, key=lambda note: note.start)
  prev_start = sorted_notes[0].start

  # Separate each individual note in pitch, step and duration
  for note in sorted_notes:
    start = note.start
    end = note.end
    notes['pitch'].append(note.pitch)
    notes['step'].append(start - prev_start)
    notes['duration'].append(end - start)
    prev_start = start


  # Put notes in a dataframe
  notes_df = pd.DataFrame({name: np.array(value) for name, value in notes.items()})
  notes_df = notes_df[:_CAP] # Cap the song to match the model's architecture
  return notes_df / _SCALING_FACTORS # Scale


def display_audio(pm: pretty_midi.PrettyMIDI, seconds=-1) -> display.Audio:
  """
  Display a song in PrettyMIDI format as a display.Audio object.
  This method specially comes in useful in a jupyter notebook.

  Parameters:
  pm (str): PrettyMidi object containing a song.
  seconds (int): Time fraction of the song to be displayed. When
                 set to -1, the full length is taken.

  Returns:
  display.Audio: Song as an object allowing for display.
  """
    
  waveform = pm.fluidsynth(fs=_SAMPLING_RATE)
  # Take a sample of the generated waveform to mitigate kernel resets
  if seconds == -1: 
      waveform_short = waveform[:]
  else:
      waveform_short = waveform[:seconds*_SAMPLING_RATE]
  
  return display.Audio(waveform_short, rate=_SAMPLING_RATE)
    

def map_to_wav(song_map: pd.DataFrame, out_file: str, velocity: int=100) -> pretty_midi.PrettyMIDI:
  """
  Convert "song map" to midi file (reverse process with respect to 
  midi_to_notes) and (optionally) save it, generating a PrettyMidi object in the process.

  Parameters:
  song_map (pd.DataFrame): 3xN matrix where each column is a note, composed of
  pitch, duration and step.
  out_file (str): Path or file to write .mid file to. If None, no saving is done.
  velocity: Note loudness, i. e. the hardness a piano key is struck with.

  Returns:
  pretty_midi.PrettyMIDI: PrettyMIDI object containing the song's representation.
  """
    
  contracted_map = tf.squeeze(song_map)
  song_map_T = contracted_map.numpy().T
  notes = pd.DataFrame(song_map_T, columns=["pitch", "step", "duration"]).mul(_SCALING_FACTORS, axis=1)
  notes["pitch"] = notes["pitch"].astype('int32').clip(1, 127)

  pm = pretty_midi.PrettyMIDI()
  instrument = pretty_midi.Instrument(
      program=pretty_midi.instrument_name_to_program(
          _INSTRUMENT_NAME))

  prev_start = 0
  for i, note in notes.iterrows():
    start = float(prev_start + note['step'])
    end = float(start + note['duration'])
    note = pretty_midi.Note(
        velocity=velocity,
        pitch=int(note['pitch']),
        start=start,
        end=end,
    )
    instrument.notes.append(note)
    prev_start = start

  pm.instruments.append(instrument)
  if (out_file):
      pm.write(out_file)
  return pm

def generate_and_display(model: VAE, 
                         out_file: str=None, 
                         z_sample: tf.Tensor=None, 
                         velocity: int=100, 
                         seconds: int=120) -> display.Audio:
  """
  Generate a song, (optionally) save it and display it.

  Parameters:
  model (VAE): Instance of VAE to generate the song with.
  out_file (str): Path or file to write .mid file to. If None, no saving is done.
  z_sample (tf.Tensor): Song encoding used to generate a song. If None, perform
                        generate an unconditioned piece.
  velocity: Note loudness, i. e. the hardness a piano key is struck with.
  seconds (int): Time fraction of the song to be displayed. When
                 set to -1, the full length is taken.

  Returns:
  display.Audio: Song as an object allowing for display.
  """
    
  song_map = model.generate(z_sample)
  wav = map_to_wav(song_map, out_file, velocity)

  return display_audio(wav, seconds)