# Machine learning, flow and data import tensorflow as tf import numpy as np import pandas as pd # Audio import pretty_midi # Displaying from IPython import display # Get the absolute path of the directory and add it to sys.path in order to # get the VAE class type import sys from pathlib import Path directory = Path(__file__).resolve().parent sys.path.insert(0, str(directory)) from model import VAE # Extras import collections _CAP = 3501 # Cap for the number of notes _SAMPLING_RATE = 16000 # Parameter to pass continuous signal to a discrete one _INSTRUMENT_NAME = "Acoustic Grand Piano" # MIDI instrument used _SCALING_FACTORS = pd.Series( {"pitch": 64.024558, "step": 0.101410, "duration": 0.199386} ) # Factors used to normalize song maps def midi_to_notes(midi_file: str) -> pd.DataFrame: """ Convert midi file to "song map" (dataframe where each note is broken into its components) Parameters: midi_file (str): Path to the midi file. Returns: pd.Dataframe: 3xN matrix where each column is a note, composed of pitch, duration and step. """ pm = pretty_midi.PrettyMIDI(midi_file) instrument = pm.instruments[0] notes = collections.defaultdict(list) # Sort the notes by start time sorted_notes = sorted(instrument.notes, key=lambda note: note.start) prev_start = sorted_notes[0].start # Separate each individual note in pitch, step and duration for note in sorted_notes: start = note.start end = note.end notes['pitch'].append(note.pitch) notes['step'].append(start - prev_start) notes['duration'].append(end - start) prev_start = start # Put notes in a dataframe notes_df = pd.DataFrame({name: np.array(value) for name, value in notes.items()}) notes_df = notes_df[:_CAP] # Cap the song to match the model's architecture return notes_df / _SCALING_FACTORS # Scale def display_audio(pm: pretty_midi.PrettyMIDI, seconds=-1) -> display.Audio: """ Display a song in PrettyMIDI format as a display.Audio object. This method specially comes in useful in a jupyter notebook. Parameters: pm (str): PrettyMidi object containing a song. seconds (int): Time fraction of the song to be displayed. When set to -1, the full length is taken. Returns: display.Audio: Song as an object allowing for display. """ waveform = pm.fluidsynth(fs=_SAMPLING_RATE) # Take a sample of the generated waveform to mitigate kernel resets if seconds == -1: waveform_short = waveform[:] else: waveform_short = waveform[:seconds*_SAMPLING_RATE] return display.Audio(waveform_short, rate=_SAMPLING_RATE) def map_to_wav(song_map: pd.DataFrame, out_file: str, velocity: int=50) -> pretty_midi.PrettyMIDI: """ Convert "song map" to midi file (reverse process with respect to midi_to_notes) and (optionally) save it, generating a PrettyMidi object in the process. Parameters: song_map (pd.DataFrame): 3xN matrix where each column is a note, composed of pitch, duration and step. out_file (str): Path or file to write .mid file to. If None, no saving is done. velocity: Note loudness, i. e. the hardness a piano key is struck with. Returns: pretty_midi.PrettyMIDI: PrettyMIDI object containing the song's representation. """ # Get song map as dataframe contracted_map = tf.squeeze(song_map) song_map_T = contracted_map.numpy().T notes = pd.DataFrame(song_map_T, columns=["pitch", "step", "duration"]).mul(_SCALING_FACTORS, axis=1) notes["pitch"] = notes["pitch"].astype('int32').clip(1, 127) # Instantiate PrettyMIDI object and append notes pm = pretty_midi.PrettyMIDI() instrument = pretty_midi.Instrument( program=pretty_midi.instrument_name_to_program( _INSTRUMENT_NAME)) prev_start = 0 for i, note in notes.iterrows(): # The VAE might generate notes with negative step and duration, # and we therefore need to make sure to skip these anomalies if (note['step'] < 0 or note['duration'] < 0): continue start = float(prev_start + note['step']) end = float(start + note['duration']) note = pretty_midi.Note( velocity=velocity, pitch=int(note['pitch']), start=start, end=end, ) instrument.notes.append(note) prev_start = start pm.instruments.append(instrument) # If a path was specified, save as midi file if out_file: pm.write(out_file) return pm def generate_and_display(model: VAE, out_file: str=None, z_sample: tf.Tensor=None, velocity: int=50, seconds: int=-1) -> display.Audio: """ Generate a song, (optionally) save it and display it. Parameters: model (VAE): Instance of VAE to generate the song with. out_file (str): Path or file to write .mid file to. If None, no saving is done. z_sample (tf.Tensor): Song encoding used to generate a song. If None, perform generate an unconditioned piece. velocity: Note loudness, i. e. the hardness a piano key is struck with. seconds (int): Time fraction of the song to be displayed. When set to -1, the full length is taken. Returns: display.Audio: Song as an object allowing for display. """ song_map = model.generate(z_sample) wav = map_to_wav(song_map, out_file, velocity) return display_audio(wav, seconds)