|
_CAP = 3501 |
|
_SAMPLING_RATE = 16000 |
|
_INSTRUMENT_NAME = "Acoustic Grand Piano" |
|
_SCALING_FACTORS = pd.Series( |
|
{"pitch": 64.024558, "step": 0.101410, "duration": 0.199386} |
|
) |
|
|
|
def midi_to_notes(midi_file: str) -> pd.DataFrame: |
|
|
|
|
|
|
|
pm = pretty_midi.PrettyMIDI(midi_file) |
|
instrument = pm.instruments[0] |
|
notes = collections.defaultdict(list) |
|
|
|
|
|
sorted_notes = sorted(instrument.notes, key=lambda note: note.start) |
|
prev_start = sorted_notes[0].start |
|
|
|
|
|
for note in sorted_notes: |
|
start = note.start |
|
end = note.end |
|
notes['pitch'].append(note.pitch) |
|
notes['step'].append(start - prev_start) |
|
notes['duration'].append(end - start) |
|
prev_start = start |
|
|
|
|
|
|
|
notes_df = pd.DataFrame({name: np.array(value) for name, value in notes.items()}) |
|
notes_df = notes_df[:_CAP] |
|
return notes_df / _SCALING_FACTORS |
|
|
|
|
|
def display_audio(pm: pretty_midi.PrettyMIDI, seconds=120): |
|
waveform = pm.fluidsynth(fs=_SAMPLING_RATE) |
|
|
|
waveform_short = waveform[:seconds*_SAMPLING_RATE] |
|
return display.Audio(waveform_short, rate=_SAMPLING_RATE) |
|
|
|
|
|
|
|
|
|
def map_to_wav(song_map: pd.DataFrame, out_file: str, velocity: int=100): |
|
|
|
|
|
contracted_map = tf.squeeze(song_map) |
|
song_map_T = contracted_map.numpy().T |
|
notes = pd.DataFrame(song_map_T, columns=["pitch", "step", "duration"]).mul(_SCALING_FACTORS, axis=1) |
|
notes["pitch"] = notes["pitch"].astype('int32').clip(1, 127) |
|
|
|
pm = pretty_midi.PrettyMIDI() |
|
instrument = pretty_midi.Instrument( |
|
program=pretty_midi.instrument_name_to_program( |
|
_INSTRUMENT_NAME)) |
|
|
|
prev_start = 0 |
|
for i, note in notes.iterrows(): |
|
start = float(prev_start + note['step']) |
|
end = float(start + note['duration']) |
|
note = pretty_midi.Note( |
|
velocity=velocity, |
|
pitch=int(note['pitch']), |
|
start=start, |
|
end=end, |
|
) |
|
instrument.notes.append(note) |
|
prev_start = start |
|
|
|
pm.instruments.append(instrument) |
|
pm.write(out_file) |
|
return pm |
|
|
|
def generate_and_display(out_file, model, z_sample=None, velocity=100, seconds=120): |
|
song_map = model.generate(z_sample) |
|
display.display(imshow(tf.squeeze(song_map)[:,:50])) |
|
wav = map_to_wav(song_map, out_file, velocity) |
|
|
|
return display_audio(wav, seconds) |