Spaces:
Starting
Starting
""" data.py: | |
Data presets for training and evaluation. | |
Single Presets: | |
musicnet_mt3 | |
musicnet_em | |
musicnet_thickstun | |
slakh | |
guitarset | |
... | |
Multi Presets: | |
all_mmegs | |
... | |
""" | |
from config.vocabulary import * | |
from config.vocabulary import drum_vocab_presets, program_vocab_presets | |
from utils.utils import deduplicate_splits, merge_splits, merge_vocab | |
data_preset_single_cfg = { | |
"musicnet_mt3": { | |
"eval_vocab": [MUSICNET_INSTR_CLASS], | |
"dataset_name": "musicnet", | |
"train_split": "train_mt3", | |
"validation_split": "validation_mt3_acoustic", | |
"test_split": "test_mt3_acoustic", | |
"has_stem": False, | |
}, | |
"musicnet_mt3_synth_only": { # sanity-check | |
"eval_vocab": [MUSICNET_INSTR_CLASS], | |
"dataset_name": "musicnet", | |
"train_split": "train_mt3_synth", | |
"validation_split": "validation_mt3_synth", | |
"test_split": "test_mt3_acoustic", | |
"has_stem": False, | |
}, | |
"musicnet_mt3_em": { | |
"eval_vocab": [MUSICNET_INSTR_CLASS], | |
"dataset_name": "musicnet", | |
"train_split": "train_mt3_em", | |
"validation_split": "validation_mt3_em", | |
"test_split": "test_mt3_em", | |
"has_stem": False, | |
}, | |
"musicnet_thickstun": { # exp4 | |
"eval_vocab": [MUSICNET_INSTR_CLASS], | |
"dataset_name": "musicnet", | |
"train_split": "train_thickstun", | |
"validation_split": "test_thickstun", | |
"test_split": "test_thickstun", | |
"has_stem": False, | |
}, | |
"musicnet_thickstun_em": { # NOTE: this is not the use of external 'synth' in the paper, but the use of 'synth' within the dataset | |
"eval_vocab": [MUSICNET_INSTR_CLASS], | |
"dataset_name": "musicnet", | |
"train_split": "train_thickstun_em", | |
"validation_split": "test_thickstun_em", | |
"test_split": "test_thickstun_em", | |
"has_stem": False, | |
}, | |
"musicnet_thickstun_ext": { # exp4 | |
"eval_vocab": [MUSICNET_INSTR_CLASS], | |
"dataset_name": "musicnet", | |
"train_split": "train_thickstun", | |
"validation_split": "test_thickstun_ext", | |
"test_split": "test_thickstun_ext", | |
"has_stem": False, | |
}, | |
"musicnet_thickstun_ext_em": { # NOTE: this is not the use of external 'synth' in the paper, but the use of 'synth' within the dataset | |
"eval_vocab": [MUSICNET_INSTR_CLASS], | |
"dataset_name": "musicnet", | |
"train_split": "train_thickstun_em", | |
"validation_split": "test_thickstun_ext_em", | |
"test_split": "test_thickstun_ext_em", | |
"has_stem": False, | |
}, | |
"maps_default": { | |
"eval_vocab": [PIANO_SOLO_CLASS], | |
"dataset_name": "maps", | |
"train_split": "train", | |
"validation_split": "test", | |
"test_split": "test", | |
"has_stem": False, | |
}, | |
"maps_all": { | |
"eval_vocab": [None], | |
"dataset_name": "maps", | |
"train_split": "all", | |
"validation_split": None, | |
"test_split": None, | |
"has_stem": False, | |
}, | |
"maestro": { | |
"eval_vocab": [PIANO_SOLO_CLASS], | |
"dataset_name": "maestro", | |
"train_split": "train", | |
"validation_split": "validation", | |
"test_split": "test", | |
"has_stem": False, | |
}, | |
"maestro_final": { | |
"eval_vocab": [PIANO_SOLO_CLASS], | |
"dataset_name": "maestro", | |
"train_split": merge_splits(["train", "validation"], dataset_name="maestro"), | |
"validation_split": "test", | |
"test_split": "test", | |
"has_stem": False, | |
}, | |
"guitarset": { # 4 random players for train, 1 for valid, and 1 for test | |
"eval_vocab": [GUITAR_SOLO_CLASS], | |
"dataset_name": "guitarset", | |
"train_split": "train", | |
"validation_split": "validation", | |
"test_split": "test", | |
"has_stem": False, | |
}, | |
"guitarset_pshift": { # guitarset + pitch shift | |
"eval_vocab": [GUITAR_SOLO_CLASS], | |
"dataset_name": "guitarset", | |
"train_split": "train_pshift", | |
"validation_split": "validation", | |
"test_split": "test", | |
"has_stem": False, | |
}, | |
"guitarset_progression": { # progression 1 and 2 as train, progression 3 as test | |
"eval_vocab": [GUITAR_SOLO_CLASS], | |
"dataset_name": "guitarset", | |
"train_split": merge_splits(["progression_1", "progression_2"], dataset_name="guitarset"), | |
"validation_split": "progression_3", | |
"test_split": "progression_3", | |
"has_stem": False, | |
}, | |
"guitarset_progression_pshift": { # guuitarset_progression + pitch shift | |
"eval_vocab": [GUITAR_SOLO_CLASS], | |
"dataset_name": "guitarset", | |
"train_split": merge_splits(["progression_1_pshift", "progression_2_pshift"], dataset_name="guitarset"), | |
"validation_split": "progression_3", | |
"test_split": "progression_3", | |
"has_stem": False, | |
}, | |
"guitarset_minus_bn": { # guuitarset_style + pitch shift | |
"eval_vocab": [GUITAR_SOLO_CLASS], | |
"dataset_name": "guitarset", | |
"train_split": merge_splits(["Funk_pshift", "SS_pshift", "Jazz_pshift", "Rock_pshift"], | |
dataset_name="guitarset"), | |
"validation_split": "BN", | |
"test_split": "BN", | |
"has_stem": False, | |
}, | |
"guitarset_minus_funk": { # guuitarset_style + pitch shift | |
"eval_vocab": [GUITAR_SOLO_CLASS], | |
"dataset_name": "guitarset", | |
"train_split": merge_splits(["BN_pshift", "SS_pshift", "Jazz_pshift", "Rock_pshift"], | |
dataset_name="guitarset"), | |
"validation_split": "Funk", | |
"test_split": "Funk", | |
"has_stem": False, | |
}, | |
"guitarset_minus_ss": { # guuitarset_style + pitch shift | |
"eval_vocab": GUITAR_SOLO_CLASS, | |
"dataset_name": "guitarset", | |
"train_split": merge_splits(["BN_pshift", "Funk_pshift", "Jazz_pshift", "Rock_pshift"], | |
dataset_name="guitarset"), | |
"validation_split": "SS", | |
"test_split": "SS", | |
"has_stem": False, | |
}, | |
"guitarset_minus_jazz": { # guuitarset_style + pitch shift | |
"eval_vocab": [GUITAR_SOLO_CLASS], | |
"dataset_name": "guitarset", | |
"train_split": merge_splits(["BN_pshift", "Funk_pshift", "SS_pshift", "Rock_pshift"], | |
dataset_name="guitarset"), | |
"validation_split": "Jazz", | |
"test_split": "Jazz", | |
"has_stem": False, | |
}, | |
"guitarset_minus_rock": { # guuitarset_style + pitch shift | |
"eval_vocab": [GUITAR_SOLO_CLASS], | |
"dataset_name": "guitarset", | |
"train_split": merge_splits(["BN_pshift", "Funk_pshift", "SS_pshift", "Jazz_pshift"], | |
dataset_name="guitarset"), | |
"validation_split": "Rock", | |
"test_split": "Rock", | |
"has_stem": False, | |
}, | |
"guitarset_all": { | |
"eval_vocab": [None], | |
"dataset_name": "guitarset", | |
"train_split": "all", | |
"validation_split": None, | |
"test_split": None, | |
"has_stem": False, | |
}, | |
"enstdrums_dtp": { | |
"eval_vocab": [None], | |
"eval_drum_vocab": drum_vocab_presets["ksh"], | |
"dataset_name": "enstdrums", | |
"train_split": merge_splits(["drummer_1_dtp", "drummer_2_dtp", "drummer_1_dtp", "drummer_2_dtp"], dataset_name="enstdrums"), | |
"validation_split": "drummer_1_dtp", # for sanity check | |
"test_split": "drummer_3_dtp", | |
"has_stem": False, | |
}, | |
"enstdrums_dtm": { | |
"eval_vocab": [None], | |
"eval_drum_vocab": drum_vocab_presets["ksh"], | |
"dataset_name": "enstdrums", | |
"train_split": merge_splits(["drummer_1_dtm", "drummer_2_dtm", "drummer_1_dtp", "drummer_2_dtp"], dataset_name="enstdrums"), | |
"validation_split": "drummer_3_dtm_r2", # 0.6 * drum | |
"test_split": "drummer_3_dtm_r1", # 0.75 * drum | |
"has_stem": True, | |
}, | |
"enstdrums_random_dtm": { # single dataset training as a denoising ADT model | |
"eval_vocab": [None], | |
"eval_drum_vocab": drum_vocab_presets["ksh"], | |
"dataset_name": "enstdrums", | |
"train_split": "train_dtm", | |
"validation_split": "validation_dtm", | |
"test_split": "test_dtm", | |
"has_stem": True, | |
}, | |
"enstdrums_random": { # multi dataset training with random split of 70:15:15 | |
"eval_vocab": [None], | |
"eval_drum_vocab": drum_vocab_presets["ksh"], | |
"dataset_name": "enstdrums", | |
"train_split": "train_dtp", | |
"validation_split": "test_dtm", | |
"test_split": "test_dtm", | |
"has_stem": True, | |
}, | |
"enstdrums_random_plus_dtd": { # multi dataset training plus dtd | |
"eval_vocab": [None], | |
"eval_drum_vocab": drum_vocab_presets["ksh"], | |
"dataset_name": "enstdrums", | |
"train_split": merge_splits(["train_dtp", "all_dtd"], dataset_name="enstdrums"), | |
"validation_split": "test_dtm", | |
"test_split": "test_dtm", | |
"has_stem": True, | |
}, | |
"mir_st500": { | |
"eval_vocab": [SINGING_SOLO_CLASS], | |
"dataset_name": "mir_st500", | |
"train_split": "train_stem", | |
"validation_split": "test", | |
"test_split": "test", | |
"has_stem": True, | |
}, | |
"mir_st500_voc": { | |
"eval_vocab": [SINGING_SOLO_CLASS], | |
"dataset_name": "mir_st500", | |
"train_split": "train_vocal", | |
"validation_split": "test_vocal", | |
"test_split": "test_vocal", | |
"has_stem": False, | |
}, | |
"mir_st500_voc_debug": { # using train_vocal for test (for debugging) | |
"eval_vocab": [SINGING_SOLO_CLASS], | |
"dataset_name": "mir_st500", | |
"train_split": "train_vocal", | |
"validation_split": "test_vocal", | |
"test_split": "train_vocal", | |
"has_stem": False, | |
}, | |
"slakh": { | |
"eval_vocab": [GM_INSTR_CLASS], | |
"eval_drum_vocab": drum_vocab_presets["gm"], | |
"dataset_name": "slakh", | |
"train_split": "train", | |
"validation_split": "validation", | |
"test_split": "test", | |
"has_stem": True, | |
}, | |
"slakh_final": { | |
"eval_vocab": [GM_INSTR_CLASS], | |
"eval_drum_vocab": drum_vocab_presets["gm"], | |
"dataset_name": "slakh", | |
"train_split": merge_splits(["train", "validation"], dataset_name="slakh"), | |
"validation_split": "test", | |
"test_split": "test", | |
"has_stem": True, | |
}, | |
"rwc_pop_bass": { | |
"eval_vocab": [BASS_SOLO_CLASS], | |
"add_pitch_class_metric": ["Bass"], | |
"dataset_name": "rwc_pop", | |
"train_split": None, | |
"validation_split": "bass", | |
"test_split": "bass", | |
"has_stem": False, | |
}, | |
"rwc_pop_full": { | |
"eval_vocab": [GM_INSTR_CLASS_PLUS], | |
"add_pitch_class_metric": list(GM_INSTR_CLASS_PLUS.keys()), | |
"dataset_name": "rwc_pop", | |
"train_split": None, | |
"validation_split": "full", | |
"test_split": "full", | |
"has_stem": False, | |
}, | |
"egmd": { | |
"eval_vocab": [None], | |
"eval_drum_vocab": drum_vocab_presets["ksh"], | |
"dataset_name": "egmd", | |
"train_split": "train", | |
"validation_split": "validation", | |
"test_split": "test_reduced", # EGMD has 5000+ test files, so we reudce it to 200 files to save time | |
# "train_limit_num_files": 4402, #8804, # 17608, # limit the number of files for training to random choice of half. | |
"has_stem": False, | |
}, | |
"urmp": { | |
"eval_vocab": [GM_INSTR_CLASS], | |
"dataset_name": "urmp", | |
"train_split": "train", | |
"validation_split": "test", | |
"test_split": "test", | |
"has_stem": True, | |
}, | |
"cmedia": { | |
"eval_vocab": [SINGING_SOLO_CLASS], | |
"dataset_name": "cmedia", | |
"train_split": "train_stem", | |
"validation_split": "train", | |
"test_split": "train", | |
"has_stem": True, | |
}, | |
"cmedia_voc": { | |
"eval_vocab": [SINGING_SOLO_CLASS], | |
"dataset_name": "cmedia", | |
"train_split": "train_vocal", | |
"validation_split": "train_vocal", | |
"test_split": "train_vocal", | |
"has_stem": False, | |
}, | |
"idmt_smt_bass": { | |
"eval_vocab": [BASS_SOLO_CLASS], | |
"dataset_name": "idmt_smt_bass", | |
"train_split": "train", | |
"validation_split": "validation", | |
"test_split": "validation", | |
"has_stem": False, | |
}, | |
"geerdes": { # full mix dataset for evaluation | |
"eval_vocab": [GM_INSTR_CLASS_PLUS], | |
"dataset_name": "geerdes", | |
"train_split": None, | |
"validation_split": None, | |
"test_split": "all", | |
"has_stem": False, | |
}, | |
"geerdes_sep": { # Using vocal/accomp separation for evalutation | |
"eval_vocab": [GM_INSTR_CLASS_PLUS], | |
"dataset_name": "geerdes", | |
"train_split": None, | |
"validation_split": None, | |
"test_split": "all_sep", | |
"has_stem": False, | |
}, | |
"geerdes_half": { # Using half dataset for train/val | |
"eval_vocab": [GM_INSTR_CLASS_PLUS], | |
"dataset_name": "geerdes", | |
"train_split": "train", | |
"validation_split": "validation", | |
"test_split": "validation", | |
"has_stem": False, | |
}, | |
"geerdes_half_sep": { # Using half dataset with vocal/accomp separation for train/val | |
"eval_vocab": [GM_INSTR_CLASS_PLUS], | |
"dataset_name": "geerdes", | |
"train_split": "train_sep", | |
"validation_split": "validation_sep", | |
"test_split": "validation_sep", | |
"has_stem": False, | |
}, | |
} | |
data_preset_multi_cfg = { | |
"musicnet_mt3_em_synth_plus_maps": { | |
"presets": ["musicnet_mt3_em_synth", "maps_all"], | |
"weights": [0.6, 0.4], | |
"eval_vocab": [MUSICNET_INSTR_CLASS], | |
}, | |
"musicnet_em_synth_table2_plus_maps": { | |
"presets": ["musicnet_em_synth_table2", "maps_all"], | |
"weights": [0.6, 0.4], | |
"eval_vocab": [MUSICNET_INSTR_CLASS], | |
}, | |
"musicnet_em_synth_table2_plus_maps_multi": { | |
"presets": ["musicnet_em_synth_table2", "maps_default"], | |
"weights": [0.6, 0.4], | |
"eval_vocab": [MUSICNET_INSTR_CLASS], | |
}, | |
"guitarset_progression_plus_maps": { | |
"presets": ["guitarset_progression", "maps_all"], | |
"weights": [0.5, 0.5], | |
"eval_vocab": [GUITAR_SOLO_CLASS], | |
}, | |
"guitarset_pshift_plus_maps": { | |
"presets": ["guitarset_pshift", "maps_default"], | |
"weights": [0.6, 0.4], | |
"eval_vocab": [merge_vocab([GUITAR_SOLO_CLASS, PIANO_SOLO_CLASS])], | |
}, | |
"guitarset_pshift_plus_musicnet_thick": { | |
"presets": ["guitarset_pshift", "musicnet_thickstun_em"], | |
"weights": [0.5, 0.5], | |
"eval_vocab": [merge_vocab([GUITAR_SOLO_CLASS, PIANO_SOLO_CLASS])], | |
}, | |
"multi_sanity_check": { | |
"presets": ["musicnet_mt3_synth_only", "musicnet_mt3_synth_only"], | |
"weights": [0.6, 0.4], | |
"eval_vocab": [MUSICNET_INSTR_CLASS], | |
}, | |
"all_mmegs": { | |
"presets": [ | |
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp", "guitarset_pshift" | |
], | |
"weights": [0.2, 0.2, 0.2, 0.2, 0.2], | |
"eval_vocab": [None] * 5, # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_gt_cv0": { | |
"presets": [ | |
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp", "guitarset_minus_bn" | |
], | |
"weights": [0.2, 0.2, 0.2, 0.2, 0.2], | |
"eval_vocab": [None] * 5, # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_gt_cv1": { | |
"presets": [ | |
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp", | |
"guitarset_minus_funk" | |
], | |
"weights": [0.2, 0.2, 0.2, 0.2, 0.2], | |
"eval_vocab": [None] * 5, # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_gt_cv2": { | |
"presets": [ | |
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp", "guitarset_minus_ss" | |
], | |
"weights": [0.2, 0.2, 0.2, 0.2, 0.2], | |
"eval_vocab": [None] * 5, # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_gt_cv3": { | |
"presets": [ | |
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp", | |
"guitarset_minus_rock" | |
], | |
"weights": [0.2, 0.2, 0.2, 0.2, 0.2], | |
"eval_vocab": [None] * 5, # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_gt_cv4": { | |
"presets": [ | |
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp", | |
"guitarset_minus_jazz" | |
], | |
"weights": [0.2, 0.2, 0.2, 0.2, 0.2], | |
"eval_vocab": [None] * 5, # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_enstdrums_random": { | |
"presets": [ | |
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_random", "guitarset" | |
], | |
"weights": [0.2, 0.2, 0.2, 0.2, 0.2], | |
"eval_vocab": [None] * 5, # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_plus_egmd": { | |
"presets": [ | |
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_random_plus_dtd", | |
"guitarset", "egmd" | |
], | |
"weights": [0.2, 0.2, 0.2, 0.1, 0.1, 0.2], | |
"eval_vocab": [None] * 6, # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_dtp_egmd": { | |
"presets": [ | |
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp", "guitarset", "egmd" | |
], | |
"weights": [0.2, 0.2, 0.2, 0.1, 0.1, 0.2], | |
"eval_vocab": [None] * 6, # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_weighted_slakh": { | |
"presets": [ | |
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp", "guitarset_pshift", "egmd" | |
], | |
"weights": [0.5, 0.1, 0.1, 0.05, 0.05, 0.2], | |
"eval_vocab": [None] * 6, # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_weighted_mt3": { # for comparison with MT3 | |
"presets": [ | |
"slakh", "musicnet_mt3", "mir_st500_voc", "enstdrums_dtp", | |
"guitarset_progression_pshift", "egmd" | |
], | |
"weights": [0.5, 0.1, 0.1, 0.05, 0.05, 0.2], | |
"eval_vocab": [None] * 6, # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_weighted_mt3_em": { # musicnet_mt3_em | |
"presets": [ | |
"slakh", "musicnet_mt3_em", "mir_st500_voc", "enstdrums_dtp", | |
"guitarset_progression_pshift", "egmd" | |
], | |
"weights": [0.5, 0.1, 0.1, 0.05, 0.05, 0.2], | |
"eval_vocab": [None] * 6, # None means instrument-agnoßstic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_urmp": { | |
"presets": [ | |
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp", | |
"guitarset_pshift", "egmd", "urmp" | |
], | |
"weights": [0.5, 0.2, 0.1, 0.05, 0.05, 0.05, 0.1], | |
"eval_vocab": [None] * 7, # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_urmp_mt3": { # for comparison with MT3 including URMP | |
"presets": [ | |
"slakh", "musicnet_mt3", "mir_st500_voc", "enstdrums_dtp", | |
"guitarset_progression", "egmd", "urmp" | |
], | |
"weights": [0.5, 0.2, 0.1, 0.05, 0.05, 0.0125, 0.1], | |
"eval_vocab": [None] * 7, # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_urmp_mt3_em": { # musicnet_mt3_em including URMP | |
"presets": [ | |
"slakh", "musicnet_mt3_em", "mir_st500_voc", "enstdrums_dtp", | |
"guitarset_progression", "egmd", "urmp" | |
], | |
"weights": [0.5, 0.2, 0.1, 0.05, 0.05, 0.0125, 0.1], | |
"eval_vocab": [None] * 7, # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_maestro": { # including Mestro and URMP | |
"presets": [ | |
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp", | |
"guitarset_pshift", "egmd", "urmp", "maestro" | |
], | |
"weights": [0.5, 0.1, 0.125, 0.075, 0.025, 0.01, 0.1, 0.1], | |
"eval_vocab": [None] * 8, # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_maestro_mt3": { # for comparison with MT3 including URMP | |
"presets": [ | |
"slakh", "musicnet_mt3", "mir_st500_voc", "enstdrums_dtp", | |
"guitarset_progression", "egmd", "urmp", "maestro" | |
], | |
"weights": [0.5, 0.1, 0.1, 0.05, 0.05, 0.0125, 0.1, 0.1], | |
"eval_vocab": [None] * 8, # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_maestro_mt3_em": { # musicnet_mt3_em including URMP | |
"presets": [ | |
"slakh", "musicnet_mt3_em", "mir_st500_voc", "enstdrums_dtp", | |
"guitarset_progression", "egmd", "urmp", "maestro" | |
], | |
"weights": [0.5, 0.1, 0.1, 0.05, 0.05, 0.0125, 0.1, 0.1], | |
"eval_vocab": [None] * 8, # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"singing_v1": { # slakh + mir_st500 without spleeter | |
"presets": ["slakh", "mir_st500"], | |
"weights": [0.8, 0.2], | |
"eval_vocab": [None, SINGING_SOLO_CLASS], # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_singing_v1": { # for singing-only task | |
"presets": [ | |
"slakh", "musicnet_thickstun_em", "mir_st500_stem", "enstdrums_dtp", | |
"guitarset_pshift", "egmd", "urmp", "maestro" | |
], | |
"weights": [0.5, 0.1, 0.1, 0.05, 0.05, 0.0125, 0.1, 0.1], | |
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None], # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_singing_drum_v1": { # for singing-only and drum-only tasks | |
"presets": [ | |
"slakh", "musicnet_thickstun_em", "mir_st500_stem", "enstdrums_dtm", | |
"guitarset_pshift", "egmd", "urmp", "maestro" | |
], | |
"weights": [0.5, 0.1, 0.1, 0.05, 0.05, 0.0125, 0.1, 0.1], | |
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None], # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_cross": { # including Mestro and URMP | |
"presets": [ | |
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp", | |
"guitarset_pshift", "egmd", "urmp", "maestro" | |
], | |
"weights": [0.5, 0.1, 0.125, 0.075, 0.025, 0.01, 0.1, 0.1], | |
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None], # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_cross_rebal": { # rebalanced for cross-augment, using spleeter | |
"presets": [ | |
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp", | |
"guitarset_pshift", "egmd", "urmp", "maestro" | |
], | |
"weights": [0.4, 0.15, 0.15, 0.075, 0.025, 0.01, 0.1, 0.1], | |
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None], # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_cross_rebal2": { # rebalanced for cross-augment, using spleeter | |
"presets": [ | |
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp", | |
"guitarset_pshift", "egmd", "urmp", "maestro" | |
], | |
"weights": [0.275, 0.19, 0.19, 0.1, 0.025, 0.02, 0.1, 0.1], | |
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None], # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_cross_rebal4": { # rebalanced for cross-augment, using spleeter | |
"presets": [ | |
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp", | |
"guitarset_pshift", "egmd", "urmp", "maestro" | |
], | |
"weights": [0.258, 0.19, 0.2, 0.125, 0.022, 0.005, 0.1, 0.1], | |
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None], # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_cross_rebal5": { # rebalanced for cross-augment, using spleeter | |
"presets": [ | |
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp", | |
"guitarset_pshift", "egmd", "urmp", "maestro" | |
], | |
"weights": [0.295, 0.19, 0.24, 0.05, 0.02, 0.005, 0.1, 0.1], | |
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None], # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_cross_stem": { # accomp stem for sub-task learning + rebalanced for cross-augment | |
"presets": [ | |
"slakh", "musicnet_thickstun_em", "mir_st500_stem", "enstdrums_dtm", | |
"guitarset_pshift", "egmd", "urmp", "maestro" | |
], | |
"weights": [0.4, 0.15, 0.15, 0.075, 0.025, 0.01, 0.1, 0.1], | |
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None], # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_cross_stem_rebal3": { # accomp stem for sub-task learning + rebalanced for cross-augment | |
"presets": [ | |
"slakh", "musicnet_thickstun_em", "mir_st500_stem", "enstdrums_dtm", | |
"guitarset_pshift", "egmd", "urmp", "maestro" | |
], | |
"weights": [0.265, 0.18, 0.21, 0.1, 0.025, 0.02, 0.1, 0.1], | |
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None], # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_cross_v6": { # +cmeida +idmt_smt_bass | |
"presets": [ | |
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp", | |
"guitarset", "egmd", "urmp", "maestro", "idmt_smt_bass", "cmedia_voc", | |
], | |
"weights": [0.295, 0.19, 0.19, 0.05, 0.01, 0.005, 0.1, 0.1, 0.01, 0.05], | |
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None, BASS_SOLO_CLASS, SINGING_SOLO_CLASS], # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_cross_v6_geerdes": { # +geerdes_half | |
"presets": [ | |
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp", | |
"guitarset", "egmd", "urmp", "maestro", "idmt_smt_bass", "cmedia_voc", | |
"geerdes_half", "geerdes_half_sep" | |
], | |
"weights": [0.295, 0.19, 0.19, 0.05, 0.01, 0.005, 0.075, 0.075, 0.01, 0.05, 0.025, 0.025], | |
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None, BASS_SOLO_CLASS, | |
SINGING_SOLO_CLASS, GM_INSTR_CLASS_PLUS, GM_INSTR_CLASS_PLUS], # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_cross_v6_geerdes_rebal": { # +geerdes_half | |
"presets": [ | |
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp", | |
"guitarset", "egmd", "urmp", "maestro", "idmt_smt_bass", "cmedia_voc", | |
"geerdes_half", "geerdes_half_sep" | |
], | |
"weights": [0.245, 0.175, 0.19, 0.05, 0.01, 0.005, 0.075, 0.05, 0.01, 0.05, 0.075, 0.075], | |
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None, BASS_SOLO_CLASS, | |
SINGING_SOLO_CLASS, GM_INSTR_EXT_CLASS_PLUS, GM_INSTR_EXT_CLASS_PLUS], # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_cross_v7": { | |
"presets": [ | |
"slakh", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp", | |
"guitarset_progression_pshift", "egmd", "urmp", "maestro", "idmt_smt_bass", "cmedia_voc", | |
], | |
"weights": [0.295, 0.19, 0.191, 0.05, 0.01, 0.004, 0.1, 0.1, 0.01, 0.05], | |
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None, BASS_SOLO_CLASS, SINGING_SOLO_CLASS], # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_cross_final": { | |
"presets": [ | |
"slakh_final", "musicnet_thickstun_em", "mir_st500_voc", "enstdrums_dtp", | |
"guitarset_progression_pshift", "egmd", "urmp", "maestro_final", "idmt_smt_bass", "cmedia_voc", | |
], | |
"weights": [0.295, 0.19, 0.191, 0.05, 0.01, 0.004, 0.1, 0.1, 0.01, 0.05], | |
"eval_vocab": [None, None, SINGING_SOLO_CLASS, None, None, None, None, None, BASS_SOLO_CLASS, SINGING_SOLO_CLASS], # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"all_eval_final": { # The final evaluation set | |
"presets": [ | |
"slakh", "musicnet_thickstun", "musicnet_thickstun_em", "musicnet_thickstun_ext", | |
"musicnet_thickstun_ext_em", "mir_st500_voc", "mir_st500", "enstdrums_dtp", | |
"enstdrums_dtm", "guitarset_progression_pshift", "rwc_pop_bass", "maestro", "urmp", | |
"maps_default", "rwc_pop_full", # "geerdes", "geerdes_sep", | |
], | |
"eval_vocab": [ | |
GM_INSTR_CLASS, MUSICNET_INSTR_CLASS, MUSICNET_INSTR_CLASS, MUSICNET_INSTR_CLASS, | |
MUSICNET_INSTR_CLASS, SINGING_SOLO_CLASS, SINGING_SOLO_CLASS, None, | |
None, None, BASS_SOLO_CLASS, PIANO_SOLO_CLASS, GM_INSTR_CLASS, | |
PIANO_SOLO_CLASS, GM_INSTR_CLASS_PLUS, # GM_INSTR_CLASS_PLUS, GM_INSTR_CLASS_PLUS | |
], | |
"eval_drum_vocab": drum_vocab_presets["ksh"], | |
}, | |
"geerdes_eval": { # Geerdes evaluation sets for models trained without Geerdes. | |
"presets": ["geerdes_sep", "geerdes"], | |
"eval_vocab": [GM_INSTR_CLASS_PLUS, GM_INSTR_CLASS_PLUS], | |
"eval_drum_vocab": drum_vocab_presets["gm"], | |
}, | |
"geerdes_half_eval": { # Geerdes evaluation sets for models trained with Geerdes-half | |
"presets": ["geerdes_half_sep", "geerdes_half"], | |
"eval_vocab": [GM_INSTR_CLASS_PLUS, GM_INSTR_CLASS_PLUS], | |
"eval_drum_vocab": drum_vocab_presets["gm"], | |
}, | |
"minimal": { # slakh + mir_st500 with spleeter | |
"presets": ["slakh", "mir_st500_voc"], | |
"weights": [0.8, 0.2], | |
"eval_vocab": [None, SINGING_SOLO_CLASS], # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
"singing_debug": { # slakh + mir_st500 with spleeter | |
"presets": ["mir_st500_voc_debug"], | |
"weights": [1.0], | |
"eval_vocab": [SINGING_SOLO_CLASS], # None means instrument-agnostic F1 for each dataset | |
"eval_drum_vocab": drum_vocab_presets["ksh"], # for drums, kick-snare-hihat metric | |
"val_max_num_files": 20, # max 20 files per dataset | |
"test_max_num_files": None, | |
}, | |
} | |