File size: 6,003 Bytes
22a2b4f 9b5281a 13c33d8 9b5281a ac7d960 11cac8a 9ec6403 3231a82 9ec6403 3bf476b 9ec6403 3bf476b 9ec6403 ac7d960 11cac8a 3231a82 11cac8a ac7d960 11cac8a ac7d960 133505d 3bf476b 11cac8a edac276 9ec6403 3bf476b 9ec6403 11cac8a 9ec6403 3bf476b e878ec7 11cac8a 64b1f25 9ec6403 edac276 9ec6403 3bf476b 9ec6403 ebc3f07 11cac8a ebc3f07 11cac8a ac7d960 11cac8a ebc3f07 11cac8a ebc3f07 edac276 11cac8a e878ec7 f97762f bd1f4bf edac276 e878ec7 edac276 3bf476b edac276 008e95c 4537fd9 3bf476b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 |
# Machine learning, flow and data
import tensorflow as tf
import numpy as np
import pandas as pd
# Audio
import pretty_midi
# Displaying
from IPython import display
# Get the absolute path of the directory and add it to sys.path in order to
# get the VAE class type
import sys
from pathlib import Path
directory = Path(__file__).resolve().parent
sys.path.insert(0, str(directory))
from model import VAE
# Extras
import collections
_CAP = 3501 # Cap for the number of notes
_SAMPLING_RATE = 16000 # Parameter to pass continuous signal to a discrete one
_INSTRUMENT_NAME = "Acoustic Grand Piano" # MIDI instrument used
_SCALING_FACTORS = pd.Series(
{"pitch": 64.024558, "step": 0.101410, "duration": 0.199386}
) # Factors used to normalize song maps
def midi_to_notes(midi_file: str) -> pd.DataFrame:
"""
Convert midi file to "song map" (dataframe where each note is broken
into its components). The song must have at least 3501 notes.
Parameters
----------
midi_file : str
Path to the midi file.
Returns
-------
song_map : pd.Dataframe
3xN matrix where each column is a note, composed of pitch, duration and step.
"""
pm = pretty_midi.PrettyMIDI(midi_file)
instrument = pm.instruments[0]
notes = collections.defaultdict(list)
# Sort the notes by start time
sorted_notes = sorted(instrument.notes, key=lambda note: note.start)
if len(sorted_notes) < 3501:
raise ValueError("Song must have at least 3501 notes.")
prev_start = sorted_notes[0].start
# Separate each individual note in pitch, step and duration
for note in sorted_notes:
start = note.start
end = note.end
notes['pitch'].append(note.pitch)
notes['step'].append(start - prev_start)
notes['duration'].append(end - start)
prev_start = start
# Put notes in a dataframe
notes_df = pd.DataFrame({name: np.array(value) for name, value in notes.items()})
notes_df = notes_df[:_CAP] # Cap the song to match the model's architecture
song_map = (notes_df / _SCALING_FACTORS).T # Scale and get transpose
return song_map
def display_audio(pm: pretty_midi.PrettyMIDI, seconds=-1) -> display.Audio:
"""
Display a song in PrettyMIDI format as a display.Audio object.
This method specially comes in useful in a jupyter notebook.
Parameters
----------
pm : str
PrettyMidi object containing a song.
seconds : int
Time fraction of the song to be displayed.
Default ``-1``, for which the full length is taken.
Returns
-------
display_obj : display.Audio
Song as an object allowing for display.
"""
waveform = pm.fluidsynth(fs=_SAMPLING_RATE)
# Take a sample of the generated waveform to mitigate kernel resets
if seconds == -1:
waveform_short = waveform[:]
else:
waveform_short = waveform[:seconds*_SAMPLING_RATE]
display_obj = display.Audio(waveform_short, rate=_SAMPLING_RATE)
return display_obj
def notes_to_midi(song_map: pd.DataFrame, out_file: str, velocity: int=50) -> pretty_midi.PrettyMIDI:
"""
Convert "song map" to midi file (reverse process with respect to
midi_to_notes) and (optionally) save it, generating a PrettyMidi object in the process.
Parameters
----------
song_map : pd.DataFrame
3xN matrix where each column is a note, composed of pitch, duration and step.
out_file : str
Path or file to write .mid file to. If None, no saving is done.
velocity : int
Note loudness, i. e. the hardness a piano key is struck with.
Default ``50``.
Returns
-------
pm : pretty_midi.PrettyMIDI
PrettyMIDI object containing the song's representation.
"""
# Get song map as dataframe
contracted_map = tf.squeeze(song_map)
song_map_T = contracted_map.numpy().T
notes = pd.DataFrame(song_map_T, columns=["pitch", "step", "duration"]).mul(_SCALING_FACTORS, axis=1)
notes["pitch"] = notes["pitch"].astype('int32').clip(1, 127)
# Instantiate PrettyMIDI object and append notes
pm = pretty_midi.PrettyMIDI()
instrument = pretty_midi.Instrument(
program=pretty_midi.instrument_name_to_program(
_INSTRUMENT_NAME))
prev_start = 0
for i, note in notes.iterrows():
# The VAE might generate notes with negative step and duration,
# and we therefore need to make sure to skip these anomalies
if (note['step'] < 0 or note['duration'] < 0):
continue
start = float(prev_start + note['step'])
end = float(start + note['duration'])
note = pretty_midi.Note(
velocity=velocity,
pitch=int(note['pitch']),
start=start,
end=end,
)
instrument.notes.append(note)
prev_start = start
pm.instruments.append(instrument)
# If a path was specified, save as midi file
if out_file:
pm.write(out_file)
return pm
def generate_and_display(model: VAE,
out_file: str=None,
z_sample: tf.Tensor=None,
velocity: int=50,
seconds: int=-1) -> display.Audio:
"""
Generate a song, (optionally) save it and display it.
Parameters
----------
model : VAE
Instance of VAE to generate the song with.
out_file : str
Path or file to write .mid file to.
Default ``None``, for which no saving is done.
z_sample : tf.Tensor
Song encoding used to generate a song.
Default ``None``, for which an unconditioned piece is generated.
velocity : int
Note loudness, i. e. the hardness a piano key is struck with.
Default ``50``.
seconds : int
Time fraction of the song to be displayed.
Default ``-1``, for which the full length is taken.
Returns
-------
display_obj : display.Audio
Song as an object allowing for display.
"""
song_map = model.decode(z_sample)
wav = notes_to_midi(song_map, out_file, velocity)
display_obj = display_audio(wav, seconds)
return display_obj |