Spaces:
Build error
Build error
"Encoding music21 streams -> numpy array -> text" | |
# import re | |
import music21 | |
import numpy as np | |
# from pathlib import Path | |
BPB = 4 # beats per bar | |
TIMESIG = f'{BPB}/4' # default time signature | |
PIANO_RANGE = (21, 108) | |
VALTSEP = -1 # separator value for numpy encoding | |
VALTCONT = -2 # numpy value for TCONT - needed for compressing chord array | |
SAMPLE_FREQ = 4 | |
NOTE_SIZE = 128 | |
DUR_SIZE = (10*BPB*SAMPLE_FREQ)+1 # Max length - 8 bars. Or 16 beats/quarternotes | |
MAX_NOTE_DUR = (8*BPB*SAMPLE_FREQ) | |
# Encoding process | |
# 1. midi -> music21.Stream | |
# 2. Stream -> numpy chord array (timestep X instrument X noterange) | |
# 3. numpy array -> List[Timestep][NoteEnc] | |
def midi2npenc(midi_file, skip_last_rest=True): | |
"Converts midi file to numpy encoding for language model" | |
stream = file2stream(midi_file) # 1. | |
chordarr = stream2chordarr(stream) # 2. | |
return chordarr2npenc(chordarr, skip_last_rest=skip_last_rest) # 3. | |
# Decoding process | |
# 1. NoteEnc -> numpy chord array | |
# 2. numpy array -> music21.Stream | |
def npenc2stream(arr, bpm=120): | |
"Converts numpy encoding to music21 stream" | |
chordarr = npenc2chordarr(np.array(arr)) # 1. | |
return chordarr2stream(chordarr, bpm=bpm) # 2. | |
##### ENCODING ###### | |
# 1. File To STream | |
def file2stream(fp): | |
if isinstance(fp, music21.midi.MidiFile): return music21.midi.translate.midiFileToStream(fp) | |
return music21.converter.parse(fp) | |
# 2. | |
def stream2chordarr(s, note_size=NOTE_SIZE, sample_freq=SAMPLE_FREQ, max_note_dur=MAX_NOTE_DUR): | |
"Converts music21.Stream to 1-hot numpy array" | |
# assuming 4/4 time | |
# note x instrument x pitch | |
# FYI: midi middle C value=60 | |
# (AS) TODO: need to order by instruments most played and filter out percussion or include the channel | |
highest_time = max(s.flat.getElementsByClass('Note').highestTime, s.flat.getElementsByClass('Chord').highestTime) | |
maxTimeStep = round(highest_time * sample_freq)+1 | |
score_arr = np.zeros((maxTimeStep, len(s.parts), NOTE_SIZE)) | |
def note_data(pitch, note): | |
return (pitch.midi, int(round(note.offset*sample_freq)), int(round(note.duration.quarterLength*sample_freq))) | |
for idx,part in enumerate(s.parts): | |
notes=[] | |
for elem in part.flat: | |
if isinstance(elem, music21.note.Note): | |
notes.append(note_data(elem.pitch, elem)) | |
if isinstance(elem, music21.chord.Chord): | |
for p in elem.pitches: | |
notes.append(note_data(p, elem)) | |
# sort notes by offset (1), duration (2) so that hits are not overwritten and longer notes have priority | |
notes_sorted = sorted(notes, key=lambda x: (x[1], x[2])) | |
for n in notes_sorted: | |
if n is None: continue | |
pitch,offset,duration = n | |
if max_note_dur is not None and duration > max_note_dur: duration = max_note_dur | |
score_arr[offset, idx, pitch] = duration | |
score_arr[offset+1:offset+duration, idx, pitch] = VALTCONT # Continue holding note | |
return score_arr | |
def chordarr2npenc(chordarr, skip_last_rest=True): | |
# combine instruments | |
result = [] | |
wait_count = 0 | |
for idx,timestep in enumerate(chordarr): | |
flat_time = timestep2npenc(timestep) | |
if len(flat_time) == 0: | |
wait_count += 1 | |
else: | |
# pitch, octave, duration, instrument | |
if wait_count > 0: result.append([VALTSEP, wait_count]) | |
result.extend(flat_time) | |
wait_count = 1 | |
if wait_count > 0 and not skip_last_rest: result.append([VALTSEP, wait_count]) | |
return np.array(result, dtype=int).reshape(-1, 2) # reshaping. Just in case result is empty | |
# Note: not worrying about overlaps - as notes will still play. just look tied | |
# http://web.mit.edu/music21/doc/moduleReference/moduleStream.html#music21.stream.Stream.getOverlaps | |
def timestep2npenc(timestep, note_range=PIANO_RANGE, enc_type=None): | |
# inst x pitch | |
notes = [] | |
for i,n in zip(*timestep.nonzero()): | |
d = timestep[i,n] | |
if d < 0: continue # only supporting short duration encoding for now | |
if n < note_range[0] or n >= note_range[1]: continue # must be within midi range | |
notes.append([n,d,i]) | |
notes = sorted(notes, key=lambda x: x[0], reverse=True) # sort by note (highest to lowest) | |
if enc_type is None: | |
# note, duration | |
return [n[:2] for n in notes] | |
if enc_type == 'parts': | |
# note, duration, part | |
return [n for n in notes] | |
if enc_type == 'full': | |
# note_class, duration, octave, instrument | |
return [[n%12, d, n//12, i] for n,d,i in notes] | |
##### DECODING ##### | |
# 1. | |
def npenc2chordarr(npenc, note_size=NOTE_SIZE): | |
num_instruments = 1 if len(npenc.shape) <= 2 else npenc.max(axis=0)[-1] | |
max_len = npenc_len(npenc) | |
# score_arr = (steps, inst, note) | |
score_arr = np.zeros((max_len, num_instruments, note_size)) | |
idx = 0 | |
for step in npenc: | |
n,d,i = (step.tolist()+[0])[:3] # or n,d,i | |
if n < VALTSEP: continue # special token | |
if n == VALTSEP: | |
idx += d | |
continue | |
score_arr[idx,i,n] = d | |
return score_arr | |
def npenc_len(npenc): | |
duration = 0 | |
for t in npenc: | |
if t[0] == VALTSEP: duration += t[1] | |
return duration + 1 | |
# 2. | |
def chordarr2stream(arr, sample_freq=SAMPLE_FREQ, bpm=120): | |
duration = music21.duration.Duration(1. / sample_freq) | |
stream = music21.stream.Score() | |
stream.append(music21.meter.TimeSignature(TIMESIG)) | |
stream.append(music21.tempo.MetronomeMark(number=bpm)) | |
stream.append(music21.key.KeySignature(0)) | |
for inst in range(arr.shape[1]): | |
p = partarr2stream(arr[:,inst,:], duration) | |
stream.append(p) | |
stream = stream.transpose(0) | |
return stream | |
# 2b. | |
def partarr2stream(partarr, duration): | |
"convert instrument part to music21 chords" | |
part = music21.stream.Part() | |
part.append(music21.instrument.Piano()) | |
part_append_duration_notes(partarr, duration, part) # notes already have duration calculated | |
return part | |
def part_append_duration_notes(partarr, duration, stream): | |
"convert instrument part to music21 chords" | |
for tidx,t in enumerate(partarr): | |
note_idxs = np.where(t > 0)[0] # filter out any negative values (continuous mode) | |
if len(note_idxs) == 0: continue | |
notes = [] | |
for nidx in note_idxs: | |
note = music21.note.Note(nidx) | |
note.duration = music21.duration.Duration(partarr[tidx,nidx]*duration.quarterLength) | |
notes.append(note) | |
for g in group_notes_by_duration(notes): | |
if len(g) == 1: | |
stream.insert(tidx*duration.quarterLength, g[0]) | |
else: | |
chord = music21.chord.Chord(g) | |
stream.insert(tidx*duration.quarterLength, chord) | |
return stream | |
from itertools import groupby | |
# combining notes with different durations into a single chord may overwrite conflicting durations. Example: aylictal/still-waters-run-deep | |
def group_notes_by_duration(notes): | |
"separate notes into chord groups" | |
keyfunc = lambda n: n.duration.quarterLength | |
notes = sorted(notes, key=keyfunc) | |
return [list(g) for k,g in groupby(notes, keyfunc)] | |
# Midi -> npenc Conversion helpers | |
def is_valid_npenc(npenc, note_range=PIANO_RANGE, max_dur=DUR_SIZE, | |
min_notes=32, input_path=None, verbose=True): | |
if len(npenc) < min_notes: | |
if verbose: print('Sequence too short:', len(npenc), input_path) | |
return False | |
if (npenc[:,1] >= max_dur).any(): | |
if verbose: print(f'npenc exceeds max {max_dur} duration:', npenc[:,1].max(), input_path) | |
return False | |
# https://en.wikipedia.org/wiki/Scientific_pitch_notation - 88 key range - 21 = A0, 108 = C8 | |
if ((npenc[...,0] > VALTSEP) & ((npenc[...,0] < note_range[0]) | (npenc[...,0] >= note_range[1]))).any(): | |
print(f'npenc out of piano note range {note_range}:', input_path) | |
return False | |
return True | |
# seperates overlapping notes to different tracks | |
def remove_overlaps(stream, separate_chords=True): | |
if not separate_chords: | |
return stream.flat.makeVoices().voicesToParts() | |
return separate_melody_chord(stream) | |
# seperates notes and chords to different tracks | |
def separate_melody_chord(stream): | |
new_stream = music21.stream.Score() | |
if stream.timeSignature: new_stream.append(stream.timeSignature) | |
new_stream.append(stream.metronomeMarkBoundaries()[0][-1]) | |
if stream.keySignature: new_stream.append(stream.keySignature) | |
melody_part = music21.stream.Part(stream.flat.getElementsByClass('Note')) | |
melody_part.insert(0, stream.getInstrument()) | |
chord_part = music21.stream.Part(stream.flat.getElementsByClass('Chord')) | |
chord_part.insert(0, stream.getInstrument()) | |
new_stream.append(melody_part) | |
new_stream.append(chord_part) | |
return new_stream | |
# processing functions for sanitizing data | |
def compress_chordarr(chordarr): | |
return shorten_chordarr_rests(trim_chordarr_rests(chordarr)) | |
def trim_chordarr_rests(arr, max_rests=4, sample_freq=SAMPLE_FREQ): | |
# max rests is in quarter notes | |
# max 1 bar between song start and end | |
start_idx = 0 | |
max_sample = max_rests*sample_freq | |
for idx,t in enumerate(arr): | |
if (t != 0).any(): break | |
start_idx = idx+1 | |
end_idx = 0 | |
for idx,t in enumerate(reversed(arr)): | |
if (t != 0).any(): break | |
end_idx = idx+1 | |
start_idx = start_idx - start_idx % max_sample | |
end_idx = end_idx - end_idx % max_sample | |
# if start_idx > 0 or end_idx > 0: print('Trimming rests. Start, end:', start_idx, len(arr)-end_idx, end_idx) | |
return arr[start_idx:(len(arr)-end_idx)] | |
def shorten_chordarr_rests(arr, max_rests=8, sample_freq=SAMPLE_FREQ): | |
# max rests is in quarter notes | |
# max 2 bar pause | |
rest_count = 0 | |
result = [] | |
max_sample = max_rests*sample_freq | |
for timestep in arr: | |
if (timestep==0).all(): | |
rest_count += 1 | |
else: | |
if rest_count > max_sample: | |
# old_count = rest_count | |
rest_count = (rest_count % sample_freq) + max_sample | |
# print(f'Compressing rests: {old_count} -> {rest_count}') | |
for i in range(rest_count): result.append(np.zeros(timestep.shape)) | |
rest_count = 0 | |
result.append(timestep) | |
for i in range(rest_count): result.append(np.zeros(timestep.shape)) | |
return np.array(result) | |
# sequence 2 sequence convenience functions | |
def stream2npenc_parts(stream, sort_pitch=True): | |
chordarr = stream2chordarr(stream) | |
_,num_parts,_ = chordarr.shape | |
parts = [part_enc(chordarr, i) for i in range(num_parts)] | |
return sorted(parts, key=avg_pitch, reverse=True) if sort_pitch else parts | |
def chordarr_combine_parts(parts): | |
max_ts = max([p.shape[0] for p in parts]) | |
parts_padded = [pad_part_to(p, max_ts) for p in parts] | |
chordarr_comb = np.concatenate(parts_padded, axis=1) | |
return chordarr_comb | |
def pad_part_to(p, target_size): | |
pad_width = ((0,target_size-p.shape[0]),(0,0),(0,0)) | |
return np.pad(p, pad_width, 'constant') | |
def part_enc(chordarr, part): | |
partarr = chordarr[:,part:part+1,:] | |
npenc = chordarr2npenc(partarr) | |
return npenc | |
def avg_tempo(t, sep_idx=VALTSEP): | |
avg = t[t[:, 0] == sep_idx][:, 1].sum()/t.shape[0] | |
avg = int(round(avg/SAMPLE_FREQ)) | |
return 'mt'+str(min(avg, MTEMPO_SIZE-1)) | |
def avg_pitch(t, sep_idx=VALTSEP): | |
return t[t[:, 0] > sep_idx][:, 0].mean() | |