File size: 4,890 Bytes
22a2b4f 9b5281a edac276 9b5281a ac7d960 11cac8a 9ec6403 ac7d960 11cac8a ac7d960 11cac8a ac7d960 11cac8a edac276 9ec6403 11cac8a 9ec6403 11cac8a e878ec7 11cac8a edac276 9ec6403 edac276 9ec6403 edac276 e878ec7 edac276 9ec6403 edac276 9ec6403 11cac8a ac7d960 11cac8a edac276 11cac8a e878ec7 edac276 e878ec7 edac276 e878ec7 edac276 e878ec7 edac276 11cac8a ac7d960 11cac8a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
# Machine learning, flow and data
import tensorflow as tf
import numpy as np
import pandas as pd
import VAE from model
# Audio
import pretty_midi
# Displaying
from IPython import display
# Extras
import collections
_CAP = 3501 # Cap for the number of notes
_SAMPLING_RATE = 16000 # Parameter to pass continuous signal to a discrete one
_INSTRUMENT_NAME = "Acoustic Grand Piano" # MIDI instrument used
_SCALING_FACTORS = pd.Series(
{"pitch": 64.024558, "step": 0.101410, "duration": 0.199386}
) # Factors used to normalize song maps
def midi_to_notes(midi_file: str) -> pd.DataFrame:
"""
Convert midi file to "song map" (dataframe where each note is broken
into its components)
Parameters:
midi_file (str): Path to the midi file.
Returns:
pd.Dataframe: 3xN matrix where each column is a note, composed of
pitch, duration and step.
"""
pm = pretty_midi.PrettyMIDI(midi_file)
instrument = pm.instruments[0]
notes = collections.defaultdict(list)
# Sort the notes by start time
sorted_notes = sorted(instrument.notes, key=lambda note: note.start)
prev_start = sorted_notes[0].start
# Separate each individual note in pitch, step and duration
for note in sorted_notes:
start = note.start
end = note.end
notes['pitch'].append(note.pitch)
notes['step'].append(start - prev_start)
notes['duration'].append(end - start)
prev_start = start
# Put notes in a dataframe
notes_df = pd.DataFrame({name: np.array(value) for name, value in notes.items()})
notes_df = notes_df[:_CAP] # Cap the song to match the model's architecture
return notes_df / _SCALING_FACTORS # Scale
def display_audio(pm: pretty_midi.PrettyMIDI, seconds=-1) -> display.Audio:
"""
Display a song in PrettyMIDI format as a display.Audio object.
This method specially comes in useful in a jupyter notebook.
Parameters:
pm (str): PrettyMidi object containing a song.
seconds (int): Time fraction of the song to be displayed. When
set to -1, the full length is taken.
Returns:
display.Audio: Song as an object allowing for display.
"""
waveform = pm.fluidsynth(fs=_SAMPLING_RATE)
# Take a sample of the generated waveform to mitigate kernel resets
if seconds == -1:
waveform_short = waveform[:]
else:
waveform_short = waveform[:seconds*_SAMPLING_RATE]
return display.Audio(waveform_short, rate=_SAMPLING_RATE)
def map_to_wav(song_map: pd.DataFrame, out_file: str, velocity: int=100) -> pretty_midi.PrettyMIDI:
"""
Convert "song map" to midi file (reverse process with respect to
midi_to_notes) and (optionally) save it, generating a PrettyMidi object in the process.
Parameters:
song_map (pd.DataFrame): 3xN matrix where each column is a note, composed of
pitch, duration and step.
out_file (str): Path or file to write .mid file to. If None, no saving is done.
velocity: Note loudness, i. e. the hardness a piano key is struck with.
Returns:
pretty_midi.PrettyMIDI: PrettyMIDI object containing the song's representation.
"""
contracted_map = tf.squeeze(song_map)
song_map_T = contracted_map.numpy().T
notes = pd.DataFrame(song_map_T, columns=["pitch", "step", "duration"]).mul(_SCALING_FACTORS, axis=1)
notes["pitch"] = notes["pitch"].astype('int32').clip(1, 127)
pm = pretty_midi.PrettyMIDI()
instrument = pretty_midi.Instrument(
program=pretty_midi.instrument_name_to_program(
_INSTRUMENT_NAME))
prev_start = 0
for i, note in notes.iterrows():
start = float(prev_start + note['step'])
end = float(start + note['duration'])
note = pretty_midi.Note(
velocity=velocity,
pitch=int(note['pitch']),
start=start,
end=end,
)
instrument.notes.append(note)
prev_start = start
pm.instruments.append(instrument)
if (out_file):
pm.write(out_file)
return pm
def generate_and_display(model: VAE,
out_file: str=None,
z_sample: tf.Tensor=None,
velocity: int=100,
seconds: int=120) -> display.Audio:
"""
Generate a song, (optionally) save it and display it.
Parameters:
model (VAE): Instance of VAE to generate the song with.
out_file (str): Path or file to write .mid file to. If None, no saving is done.
z_sample (tf.Tensor): Song encoding used to generate a song. If None, perform
generate an unconditioned piece.
velocity: Note loudness, i. e. the hardness a piano key is struck with.
seconds (int): Time fraction of the song to be displayed. When
set to -1, the full length is taken.
Returns:
display.Audio: Song as an object allowing for display.
"""
song_map = model.generate(z_sample)
wav = map_to_wav(song_map, out_file, velocity)
return display_audio(wav, seconds) |