# Copyright 2024 The YourMT3 Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Please see the details in the LICENSE file. """vocabulary.py Vocabulary for instrument classes. Vocabulary can be used as train_vocab or test_vocab in data_presets.py or train.py arguments. - When it is used as train_vocab, it maps the instrument classes to the first program number of the class. For example, if you use 'GM_INSTR_CLASS' as train_vocab, then the program number of 'Piano' is [0,1,2,3,4,5,6,7]. These program numbers are trained as program [0] in the model. - When it is used as eval_vocab, any program number in the instrument class is considered as correct. MUSICNET_INSTR_CLASS: 3 classes used for MusicNet benchmark GM_INSTR_CLASS: equivalent to 'MIDI Class' defined by MT3. GM_INSTR_CLASS_PLUS: GM_INSTR_CLASS + singing voice GM_INSTR_FULL: 128 GM instruments, which is extended from 'MT3_FULL' MT3_FULL: this matches the class names in Table 3 of MT3 paper ENST_DRUM_NOTES: 20 drum notes used in ENST dataset GM_DRUM_NOTES: 45 GM drum notes with percussions Program 128 is reserved for 'drum' internally. Program 129 is reserved for 'unannotated', internally. Program 100 is reserved for 'singing voice (melody)' in GM_INSTR_CLASS_PLUS. Program 101 is reserved for 'singing voice (chorus)' in GM_INSTR_CLASS_PLUS. """ # yapf: disable import numpy as np PIANO_SOLO_CLASS = { "Piano": np.arange(0, 8), } GUITAR_SOLO_CLASS = { "Guitar": np.arange(24, 32), } SINGING_SOLO_CLASS = { "Singing Voice": [100, 101], } SINGING_CHORUS_SEP_CLASS = { "Singing Voice": [100], "Singing Voice (chorus)": [101], } BASS_SOLO_CLASS = { "Bass": np.arange(32, 40), } MUSICNET_INSTR_CLASS = { "Piano": np.arange(0, 8), "Strings": np.arange(40, 52), # Solo strings + ensemble strings "Winds": np.arange(64, 80), # Reed + Pipe } GM_INSTR_CLASS = { "Piano": np.arange(0, 8), "Chromatic Percussion": np.arange(8, 16), "Organ": np.arange(16, 24), "Guitar": np.arange(24, 32), "Bass": np.arange(32, 40), "Strings": np.arange(40, 56), # Strings + Ensemble # "Strings": np.arange(40, 48), # "Ensemble": np.arange(48, 56), "Brass": np.arange(56, 64), "Reed": np.arange(64, 72), "Pipe": np.arange(72, 80), "Synth Lead": np.arange(80, 88), "Synth Pad": np.arange(88, 96), } GM_INSTR_CLASS_PLUS = GM_INSTR_CLASS.copy() GM_INSTR_CLASS_PLUS["Singing Voice"] = [100, 101] GM_INSTR_EXT_CLASS = { # Best for enjoyable MIDI file generation "Acoustic Piano": [0, 1, 3, 6, 7], "Electric Piano": [2, 4, 5], "Chromatic Percussion": np.arange(8, 16), "Organ": np.arange(16, 24), "Guitar (clean)": np.arange(24, 28), "Guitar (distortion)": [30, 28, 29, 31], # np.arange(28, 32), "Bass": [33, 32, 34, 35, 36, 37, 38, 39], # np.arange(32, 40), "Strings": [48, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 52, 53, 54, 55], # np.arange(40, 56), "Brass": np.arange(56, 64), "Reed": np.arange(64, 72), "Pipe": np.arange(72, 80), "Synth Lead": np.arange(80, 88), "Synth Pad": np.arange(88, 96), } GM_INSTR_EXT_CLASS_PLUS = GM_INSTR_EXT_CLASS.copy() GM_INSTR_EXT_CLASS_PLUS["Singing Voice"] = [100] GM_INSTR_EXT_CLASS_PLUS["Singing Voice (chorus)"] = [101] GM_INSTR_FULL = { "Acoustic Grand Piano": [0], "Bright Acoustic Piano": [1], "Electric Grand Piano": [2], "Honky-tonk Piano": [3], "Electric Piano 1": [4], "Electric Piano 2": [5], "Harpsichord": [6], "Clavinet": [7], "Celesta": [8], "Glockenspiel": [9], "Music Box": [10], "Vibraphone": [11], "Marimba": [12], "Xylophone": [13], "Tubular Bells": [14], "Dulcimer": [15], "Drawbar Organ": [16], "Percussive Organ": [17], "Rock Organ": [18], "Church Organ": [19], "Reed Organ": [20], "Accordion": [21], "Harmonica": [22], "Tango Accordion": [23], "Acoustic Guitar (nylon)": [24], "Acoustic Guitar (steel)": [25], "Electric Guitar (jazz)": [26], "Electric Guitar (clean)": [27], "Electric Guitar (muted)": [28], "Overdriven Guitar": [29], "Distortion Guitar": [30], "Guitar Harmonics": [31], "Acoustic Bass": [32], "Electric Bass (finger)": [33], "Electric Bass (pick)": [34], "Fretless Bass": [35], "Slap Bass 1": [36], "Slap Bass 2": [37], "Synth Bass 1": [38], "Synth Bass 2": [39], "Violin": [40], "Viola": [41], "Cello": [42], "Contrabass": [43], "Tremolo Strings": [44], "Pizzicato Strings": [45], "Orchestral Harp": [46], "Timpani": [47], "String Ensemble 1": [48], "String Ensemble 2": [49], "Synth Strings 1": [50], "Synth Strings 2": [51], "Choir Aahs": [52], "Voice Oohs": [53], "Synth Choir": [54], "Orchestra Hit": [55], "Trumpet": [56], "Trombone": [57], "Tuba": [58], "Muted Trumpet": [59], "French Horn": [60], "Brass Section": [61], "Synth Brass 1": [62], "Synth Brass 2": [63], "Soprano Sax": [64], "Alto Sax": [65], "Tenor Sax": [66], "Baritone Sax": [67], "Oboe": [68], "English Horn": [69], "Bassoon": [70], "Clarinet": [71], "Piccolo": [72], "Flute": [73], "Recorder": [74], "Pan Flute": [75], "Bottle Blow": [76], "Shakuhachi": [77], "Whistle": [78], "Ocarina": [79], "Lead 1 (square)": [80], "Lead 2 (sawtooth)": [81], "Lead 3 (calliope)": [82], "Lead 4 (chiff)": [83], "Lead 5 (charang)": [84], "Lead 6 (voice)": [85], "Lead 7 (fifths)": [86], "Lead 8 (bass + lead)": [87], "Pad 1 (new age)": [88], "Pad 2 (warm)": [89], "Pad 3 (polysynth)": [90], "Pad 4 (choir)": [91], "Pad 5 (bowed)": [92], "Pad 6 (metallic)": [93], "Pad 7 (halo)": [94], "Pad 8 (sweep)": [95], # "FX 1 (rain)": [96], # "FX 2 (soundtrack)": [97], # "FX 3 (crystal)": [98], # "FX 4 (atmosphere)": [99], # "FX 5 (brightness)": [100], # "FX 6 (goblins)": [101], # "FX 7 (echoes)": [102], # "FX 8 (sci-fi)": [103], # "Sitar": [104], # "Banjo": [105], # "Shamisen": [106], # "Koto": [107], # "Kalimba": [108], # "Bagpipe": [109], # "Fiddle": [110], # "Shanai": [111], # "Tinkle Bell": [112], # "Agogo": [113], # "Steel Drums": [114], # "Woodblock": [115], # "Taiko Drum": [116], # "Melodic Tom": [117], # "Synth Drum": [118], # "Reverse Cymbal": [119], # "Guitar Fret Noise": [120], # "Breath Noise": [121], # "Seashore": [122], # "Bird Tweet": [123], # "Telephone Ring": [124], # "Helicopter": [125], # "Applause": [126], # "Gunshot": [127] } MT3_FULL = { # this matches the class names in Table 3 of MT3 paper "Acoustic Piano": [0, 1, 3, 6, 7], "Electric Piano": [2, 4, 5], "Chromatic Percussion": np.arange(8, 16), "Organ": np.arange(16, 24), "Acoustic Guitar": np.arange(24, 26), "Clean Electric Guitar": np.arange(26, 29), "Distorted Electric Guitar": np.arange(29, 32), "Acoustic Bass": [32, 35], "Electric Bass": [33, 34, 36, 37, 38, 39], "Violin": [40], "Viola": [41], "Cello": [42], "Contrabass": [43], "Orchestral Harp": [46], "Timpani": [47], "String Ensemble": [48, 49, 44, 45], "Synth Strings": [50, 51], "Choir and Voice": [52, 53, 54], "Orchestra Hit": [55], "Trumpet": [56, 59], "Trombone": [57], "Tuba": [58], "French Horn": [60], "Brass Section": [61, 62, 63], "Soprano/Alto Sax": [64, 65], "Tenor Sax": [66], "Baritone Sax": [67], "Oboe": [68], "English Horn": [69], "Bassoon": [70], "Clarinet": [71], "Pipe": [73, 72, 74, 75, 76, 77, 78, 79], "Synth Lead": np.arange(80, 88), "Synth Pad": np.arange(88, 96), } MT3_FULL_PLUS = MT3_FULL.copy() MT3_FULL_PLUS["Singing Voice"] = [100] MT3_FULL_PLUS["Singing Voice (chorus)"] = [101] ENST_DRUM_NOTES = { "bd": [36], # Kick Drum "sd": [38], # Snare Drum "sweep": [0], # Brush sweep "sticks": [1], # Sticks "rs": [2], # Rim shot "cs": [37], # X-stick "chh": [42], # Closed Hi-Hat "ohh": [46], # Open Hi-Hat "cb": [56], # Cowbell "c": [3], # Other Cymbals "lmt": [47], # Low Mid Tom "mt": [48], # Mid Tom "mtr": [58], # Mid Tom Rim "lt": [45], # Low Tom "ltr": [50], # Low Tom Rim "lft": [41], # Low Floor Tom "rc": [51], # Ride Cymbal "ch": [52], # Chinese Cymbal "cr": [49], # Crash Cymbal "spl": [55], # Splash Cymbal } EGMD_DRUM_NOTES = { "Kick Drum": [36], # Listed by order of most common annotation "Snare X-stick": [37], # Snare X-Stick, https://youtu.be/a2KFrrKaoYU?t=80 "Snare Drum": [38], # Snare (head) and Electric Snare "Closed Hi-Hat": [42, 44, 22], # 44 is pedal hi-hat "Open Hi-Hat": [46, 26], "Cowbell": [56], "High Floor Tom": [43], "Low Floor Tom": [41], # Lowest Tom "Low Tom": [45], "Low-Mid Tom": [47], "Mid Tom": [48], "Low Tom (Rim)": [50], # TD-17: 47, 50, 58 "Mid Tom (Rim)": [58], # "Ride Cymbal": [51, 53, 59], "Ride": [51], "Ride (Bell)": [53], # https://youtu.be/b94hZoM5s3k?t=323 "Ride (Edge)": [59], "Chinese Cymbal": [52], "Crash Cymbal": [49, 57], "Splash Cymbal": [55], } # Inspired by Roland TD-17 MIDI note map, https://rolandus.zendesk.com/hc/en-us/articles/360005173411-TD-17-Default-Factory-MIDI-Note-Map GM_DRUM_NOTES = { "Kick Drum": [36, 35], # Listed by order of most common annotation "Snare X-stick": [37, 2], # Snare X-Stick, https://youtu.be/a2KFrrKaoYU?t=80 "Snare Drum": [38, 40], # Snare (head) and Electric Snare "Closed Hi-Hat": [42, 44, 22], # 44 is pedal hi-hat "Open Hi-Hat": [46, 26], "Cowbell": [56], "High Floor Tom": [43], "Low Floor Tom": [41], # Lowest Tom "Low Tom": [45], "Low-Mid Tom": [47], "Mid Tom": [48], "Low Tom (Rim)": [50], # TD-17: 47, 50, 58 "Mid Tom (Rim)": [58], # "Ride Cymbal": [51, 53, 59], "Ride": [51], "Ride (Bell)": [53], # https://youtu.be/b94hZoM5s3k?t=323 "Ride (Edge)": [59], "Chinese Cymbal": [52], "Crash Cymbal": [49, 57], "Splash Cymbal": [55], } KICK_SNARE_HIHAT = { "Kick Drum": [36, 35], "Snare Drum": [38, 40], # "Snare Drum + X-Stick": [38, 40, 37, 2], # "Snare X-stick": [37, 2], # Snare X-Stick, https://youtu.be/a2KFrrKaoYU?t=80 "Hi-Hat": [42, 44, 46, 22, 26], # "Ride Cymbal": [51, 53, 59], # "Hi-Hat + Ride": [42, 44, 46, 22, 26, 51, 53, 59], # "HiHat + all Cymbals": [42, 44, 46, 22, 26, 51, 53, 59, 52, 49, 57, 55], # "Kick Drum + Low Tom": [36, 35, 45], # "All Cymbal": [51, 53, 59, 52, 49, 57, 55] # "all": np.arange(30, 60) } drum_vocab_presets = { "gm": GM_DRUM_NOTES, "egmd": EGMD_DRUM_NOTES, "enst": ENST_DRUM_NOTES, "ksh": KICK_SNARE_HIHAT, "kshr": { "Kick Drum": [36, 35], "Snare Drum": [38, 40], "Hi-Hat": [42, 44, 46, 22, 26, 51, 53, 59], } } program_vocab_presets = { "gm_full": GM_INSTR_FULL, # 96 classes (except drums) "mt3_full": MT3_FULL, # 34 classes (except drums) as in MT3 paper "mt3_midi": GM_INSTR_CLASS, # 11 classes (except drums) as in MT3 paper "mt3_midi_plus": GM_INSTR_CLASS_PLUS, # 11 classes + singing (except drums) "mt3_full_plus": MT3_FULL_PLUS, # 34 classes (except drums) mt3_full + singing (except drums) "gm": GM_INSTR_CLASS, # 11 classes (except drums) "gm_plus": GM_INSTR_CLASS_PLUS, # 11 classes + singing (except drums) "gm_ext_plus": GM_INSTR_EXT_CLASS_PLUS, # 13 classes + singing + chorus (except drums) }