File size: 5,706 Bytes
a03c9b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# Copyright 2024 The YourMT3 Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Please see the details in the LICENSE file.
"""task.py"""
from config.vocabulary import *
from utils.note_event_dataclasses import Event

task_cfg = {
    "mt3_midi": { # 11 classes + drum class
        "name": "mt3_midi",
        "train_program_vocab": program_vocab_presets["mt3_midi"],
        "train_drum_vocab": drum_vocab_presets["gm"],
    },
    "mt3_midi_plus": { # 11 classes + singing + drum class
        "name": "mt3_midi_plus",
        "train_program_vocab": program_vocab_presets["mt3_midi_plus"],
        "train_drum_vocab": drum_vocab_presets["gm"],
    },
    "mt3_full": { # 34 classes (except drums) as in MT3 paper
        "name": "mt3_full",
        "train_program_vocab": program_vocab_presets["mt3_full"],
        "train_drum_vocab": drum_vocab_presets["gm"],
    },
    "mt3_full_plus": { # 34 classes (except drums) as in MT3 paper + singing + drum class
        "name": "mt3_full_plus",
        "train_program_vocab": program_vocab_presets["mt3_full_plus"],
        "train_drum_vocab": drum_vocab_presets["gm"],
    },
    "gm_ext_plus": { # 13 classes + singing + chorus (except drums) 
        "name": "gm_ext_plus",
        "train_program_vocab": program_vocab_presets["gm_ext_plus"],
        "train_drum_vocab": drum_vocab_presets["gm"],
    },
    "singing_v1": {
        "name": "singing",
        "train_program_vocab": program_vocab_presets["mt3_full_plus"],
        "train_drum_vocab": drum_vocab_presets["gm"],
        "subtask_tokens": ["task", "transcribe_singing", "transcribe_all"],
        "ignore_decoding_tokens": ["task", "transcribe_singing", "transcribe_all"],
        "max_task_token_length": 2,
        "eval_subtask_prefix": {
            "default": [Event("transcribe_all", 0), Event("task", 0)],
            "singing-only": [Event("transcribe_singing", 0),
                             Event("task", 0)],
        }
    },
    "singing_drum_v1": {
        "name": "singing_drum",
        "train_program_vocab": program_vocab_presets["mt3_full_plus"],
        "train_drum_vocab": drum_vocab_presets["gm"],
        "subtask_tokens": ["task", "transcribe_singing", "transcribe_drum", "transcribe_all"],
        "ignore_decoding_tokens": [
            "task", "transcribe_singing", "transcribe_drum", "transcribe_all"
        ],
        "max_task_token_length": 2,
        "eval_subtask_prefix": {
            "default": [Event("transcribe_all", 0), Event("task", 0)],
            "singing-only": [Event("transcribe_singing", 0),
                             Event("task", 0)],
            "drum-only": [Event("transcribe_drum", 0),
                          Event("task", 0)],
        }
    },
    "mc13": { # multi-channel decoding task of {11 classes + drums + singing}
        "name": "mc13",
        "train_program_vocab": program_vocab_presets["gm_plus"],
        "train_drum_vocab": drum_vocab_presets["gm"],
        "num_decoding_channels": len(program_vocab_presets["gm_plus"]) + 1, # 13
        "max_note_token_length_per_ch": 512, # multi-channel decoding exclusive parameter
        "mask_loss_strategy": None, # multi-channel decoding exclusive parameter
    },
    "mc13_256": { # multi-channel decoding task of {11 classes + drums + singing}
        "name": "mc13_256",
        "train_program_vocab": program_vocab_presets["gm_plus"],
        "train_drum_vocab": drum_vocab_presets["gm"],
        "num_decoding_channels": len(program_vocab_presets["gm_plus"]) + 1, # 13
        "max_note_token_length_per_ch": 256, # multi-channel decoding exclusive parameter
        "mask_loss_strategy": None, # multi-channel decoding exclusive parameter
    },
    "mc13_full_plus": { # multi-channel decoding task of {34 classes + drums + singing & chorus} mapped to 13 channels
        "name": "mc13_full_plus",
        "train_program_vocab": program_vocab_presets["mt3_full_plus"],
        "train_drum_vocab": drum_vocab_presets["gm"],
        "program2channel_vocab_source": program_vocab_presets["gm_plus"],
        "num_decoding_channels": 13,
        "max_note_token_length_per_ch": 512, # multi-channel decoding exclusive parameter
        "mask_loss_strategy": None, # multi-channel decoding exclusive parameter
    },
    "mc13_full_plus_256": { # multi-channel decoding task of {34 classes + drums + singing & chorus} mapped to 13 channels
        "name": "mc13_full_plus_256",
        "train_program_vocab": program_vocab_presets["mt3_full_plus"],
        "train_drum_vocab": drum_vocab_presets["gm"],
        "program2channel_vocab_source": program_vocab_presets["gm_plus"],
        "num_decoding_channels": 13,
        "max_note_token_length_per_ch": 256, # multi-channel decoding exclusive parameter
        "mask_loss_strategy": None, # multi-channel decoding exclusive parameter
    },
    "exc_v1": {
        "name": "exclusive",
        "train_program_vocab": program_vocab_presets["mt3_full_plus"],
        "train_drum_vocab": drum_vocab_presets["gm"],
        "subtask_tokens": ["transcribe", "all", ":"],
        # "ignore_decoding_tokens": [
        #     "task", "transcribe_singing", "transcribe_drum", "transcribe_all"
        # ],
        # "max_task_token_length": 2,
        "ignore_decoding_tokens_from_and_to": ["transcribe", ":"],
        "eval_subtask_prefix": { # this is the main task that transcribe all instruments
            "default": [Event("transcribe", 0), Event("all", 0), Event(":", 0)],
        },
        "shuffle_subtasks": True,
    },
}