File size: 5,838 Bytes
22a2b4f
 
9b5281a
 
 
 
 
 
 
 
 
13c33d8
 
 
 
 
 
 
 
 
 
9b5281a
 
 
 
ac7d960
11cac8a
 
 
 
 
 
 
9ec6403
 
3bf476b
9ec6403
3bf476b
 
 
 
 
9ec6403
3bf476b
 
 
 
9ec6403
ac7d960
11cac8a
 
 
 
 
 
 
 
ac7d960
11cac8a
 
 
 
 
 
 
 
 
ac7d960
 
 
3bf476b
 
11cac8a
 
edac276
9ec6403
 
 
 
3bf476b
 
 
 
 
 
 
 
 
 
 
 
 
9ec6403
 
11cac8a
 
9ec6403
 
 
 
 
3bf476b
 
 
e878ec7
11cac8a
f97762f
9ec6403
edac276
 
9ec6403
3bf476b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ec6403
ebc3f07
 
11cac8a
 
 
 
 
ebc3f07
11cac8a
 
 
ac7d960
11cac8a
 
 
ebc3f07
 
 
 
 
11cac8a
 
 
 
 
 
 
 
 
 
 
 
ebc3f07
 
 
edac276
11cac8a
 
e878ec7
 
 
f97762f
bd1f4bf
edac276
e878ec7
edac276
3bf476b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
edac276
 
11cac8a
ac7d960
3bf476b
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# Machine learning, flow and data
import tensorflow as tf
import numpy as np
import pandas as pd

# Audio
import pretty_midi

# Displaying
from IPython import display

# Get the absolute path of the directory and add it to sys.path in order to
# get the VAE class type
import sys
from pathlib import Path

directory = Path(__file__).resolve().parent
sys.path.insert(0, str(directory))

from model import VAE

# Extras
import collections


_CAP = 3501 # Cap for the number of notes
_SAMPLING_RATE = 16000 # Parameter to pass continuous signal to a discrete one
_INSTRUMENT_NAME = "Acoustic Grand Piano" # MIDI instrument used
_SCALING_FACTORS = pd.Series(
    {"pitch": 64.024558, "step": 0.101410, "duration": 0.199386}
) # Factors used to normalize song maps

def midi_to_notes(midi_file: str) -> pd.DataFrame:
  """
  Convert midi file to "song map" (dataframe where each note is broken
  into its components).

  Parameters
  ----------
  
  midi_file : str
      Path to the midi file.

  Returns
  -------
  song_map : pd.Dataframe
      3xN matrix where each column is a note, composed of pitch, duration and step.
  """
    
  pm = pretty_midi.PrettyMIDI(midi_file)
  instrument = pm.instruments[0]
  notes = collections.defaultdict(list)

  # Sort the notes by start time
  sorted_notes = sorted(instrument.notes, key=lambda note: note.start)
  prev_start = sorted_notes[0].start

  # Separate each individual note in pitch, step and duration
  for note in sorted_notes:
    start = note.start
    end = note.end
    notes['pitch'].append(note.pitch)
    notes['step'].append(start - prev_start)
    notes['duration'].append(end - start)
    prev_start = start


  # Put notes in a dataframe
  notes_df = pd.DataFrame({name: np.array(value) for name, value in notes.items()})
  notes_df = notes_df[:_CAP] # Cap the song to match the model's architecture
  song_map = notes_df / _SCALING_FACTORS # Scale
  return song_map


def display_audio(pm: pretty_midi.PrettyMIDI, seconds=-1) -> display.Audio:
  """
  Display a song in PrettyMIDI format as a display.Audio object.
  This method specially comes in useful in a jupyter notebook.

  Parameters
  ----------
  
  pm : str
      PrettyMidi object containing a song.
  seconds : int
      Time fraction of the song to be displayed.
      Default ``-1``, for which the full length is taken.

  Returns
  -------
  display_obj : display.Audio
      Song as an object allowing for display.
  """
    
  waveform = pm.fluidsynth(fs=_SAMPLING_RATE)
  # Take a sample of the generated waveform to mitigate kernel resets
  if seconds == -1: 
      waveform_short = waveform[:]
  else:
      waveform_short = waveform[:seconds*_SAMPLING_RATE]
  
  display_obj = display.Audio(waveform_short, rate=_SAMPLING_RATE)
    
  return display_obj
    

def map_to_wav(song_map: pd.DataFrame, out_file: str, velocity: int=50) -> pretty_midi.PrettyMIDI:
  """
  Convert "song map" to midi file (reverse process with respect to 
  midi_to_notes) and (optionally) save it, generating a PrettyMidi object in the process.

  Parameters
  ----------
  
  song_map : pd.DataFrame
      3xN matrix where each column is a note, composed of pitch, duration and step.
  out_file : str
      Path or file to write .mid file to. If None, no saving is done.
  velocity : int
      Note loudness, i. e. the hardness a piano key is struck with.
      Default ``50``.

  Returns
  -------
  
  pm : pretty_midi.PrettyMIDI
      PrettyMIDI object containing the song's representation.
  """

  # Get song map as dataframe
  contracted_map = tf.squeeze(song_map)
  song_map_T = contracted_map.numpy().T
  notes = pd.DataFrame(song_map_T, columns=["pitch", "step", "duration"]).mul(_SCALING_FACTORS, axis=1)
  notes["pitch"] = notes["pitch"].astype('int32').clip(1, 127)

  # Instantiate PrettyMIDI object and append notes
  pm = pretty_midi.PrettyMIDI()
  instrument = pretty_midi.Instrument(
      program=pretty_midi.instrument_name_to_program(
          _INSTRUMENT_NAME))

  prev_start = 0
  for i, note in notes.iterrows():
    # The VAE might generate notes with negative step and duration,
    # and we therefore need to make sure to skip these anomalies
    if (note['step'] < 0 or note['duration'] < 0):
        continue
      
    start = float(prev_start + note['step'])
    end = float(start + note['duration'])
    note = pretty_midi.Note(
        velocity=velocity,
        pitch=int(note['pitch']),
        start=start,
        end=end,
    )
    instrument.notes.append(note)
    prev_start = start

  pm.instruments.append(instrument)

  # If a path was specified, save as midi file
  if out_file:
      pm.write(out_file)
  return pm

def generate_and_display(model: VAE, 
                         out_file: str=None, 
                         z_sample: tf.Tensor=None, 
                         velocity: int=50, 
                         seconds: int=-1) -> display.Audio:
  """
  Generate a song, (optionally) save it and display it.

  Parameters
  ----------
  model : VAE
      Instance of VAE to generate the song with.
  out_file : str
      Path or file to write .mid file to.
      Default ``None``, for which no saving is done.
  z_sample : tf.Tensor 
      Song encoding used to generate a song.
      Default ``None``, for which an unconditioned piece is generated.
  velocity : int
      Note loudness, i. e. the hardness a piano key is struck with.
      Default ``50``.
  seconds : int
      Time fraction of the song to be displayed.
      Default ``-1``, for which the full length is taken.

  Returns
  -------
  display_obj : display.Audio
      Song as an object allowing for display.
  """
    
  song_map = model.generate(z_sample)
  wav = map_to_wav(song_map, out_file, velocity)
  display_obj = display_audio(wav, seconds)
                             
  return display_obj