File size: 13,234 Bytes
a03c9b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
# Copyright 2024 The YourMT3 Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Please see the details in the LICENSE file.
""" install_dataset.py """
import os
import argparse
import mirdata
from typing import Optional, Tuple, Union
from utils.preprocess.generate_dataset_stats import generate_dataset_stats_for_all_datasets, update_dataset_stats_for_new_dataset
from utils.mirdata_dev.datasets import slakh16k
from utils.preprocess.preprocess_slakh import preprocess_slakh16k, add_program_and_is_drum_info_to_file_list
from utils.preprocess.preprocess_musicnet import preprocess_musicnet16k
from utils.preprocess.preprocess_maps import preprocess_maps16k
from utils.preprocess.preprocess_maestro import preprocess_maestro16k
from utils.preprocess.preprocess_guitarset import preprocess_guitarset16k, create_filelist_by_style_guitarset16k
from utils.preprocess.preprocess_enstdrums import preprocess_enstdrums16k, create_filelist_dtm_random_enstdrums16k
from utils.preprocess.preprocess_mir_st500 import preprocess_mir_st500_16k
from utils.preprocess.preprocess_cmedia import preprocess_cmedia_16k
from utils.preprocess.preprocess_rwc_pop_full import preprocess_rwc_pop_full16k
from utils.preprocess.preprocess_rwc_pop import preprocess_rwc_pop16k
from utils.preprocess.preprocess_egmd import preprocess_egmd16k
from utils.preprocess.preprocess_mir1k import preprocess_mir1k_16k
from utils.preprocess.preprocess_urmp import preprocess_urmp16k
from utils.preprocess.preprocess_idmt_smt_bass import preprocess_idmt_smt_bass_16k
from utils.preprocess.preprocess_geerdes import preprocess_geerdes16k
from utils.utils import download_and_extract  #, download_and_extract_zenodo_restricted

# zenodo_token = "eyJhbGciOiJIUzUxMiIsImlhdCI6MTcxMDE1MDYzNywiZXhwIjoxNzEyNzA3MTk5fQ.eyJpZCI6ImRmODA5NzZlLTBjM2QtNDk5NS05YjM0LWFiNGM4NzJhMmZhMSIsImRhdGEiOnt9LCJyYW5kb20iOiIwMzY5ZDcxZjc2NTMyN2UyYmVmN2ExYjJkMmMyYTRhNSJ9.0aHnNC-7ivWQO6l8twjLR0NDH4boC0uOolAAmogVt7XRi2PHU5MEKBQoK7-wgDdnmWEIqEIvoLO6p8KTnsY9dg"


def install_slakh(data_home=os.PathLike, no_down=False) -> None:
    if not no_down:
        ds = slakh16k.Dataset(data_home, version='2100-yourmt3-16k')
        ds.download(partial_download=['2100-yourmt3-16k', 'index'])
        del (ds)
    preprocess_slakh16k(data_home, delete_source_files=False, fix_bass_octave=True)
    add_program_and_is_drum_info_to_file_list(data_home)


def install_musicnet(data_home=os.PathLike, no_down=False) -> None:
    if not no_down:
        url = "https://zenodo.org/record/7811639/files/musicnet_yourmt3_16k.tar.gz?download=1"
        checksum = "a2da7c169e26d452a4e8b9bef498b3d7"
        download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum)
    preprocess_musicnet16k(data_home, dataset_name='musicnet')


def install_maps(data_home=os.PathLike, no_down=False, sanity_check=False) -> None:
    if not no_down:
        url = "https://zenodo.org/record/7812075/files/maps_yourmt3_16k.tar.gz?download=1"
        checksum = "6b070d162c931cd5e69c16ef2398a649"
        download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum)
    preprocess_maps16k(data_home, dataset_name='maps', ignore_pedal=False, sanity_check=sanity_check)


def install_maestro(data_home=os.PathLike, no_down=False, sanity_check=False) -> None:
    if not no_down:
        url = "https://zenodo.org/record/7852176/files/maestro_yourmt3_16k.tar.gz?download=1"
        checksum = "c17c6a188d936e5ff3870ef27144d397"
        download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum)
    preprocess_maestro16k(data_home, dataset_name='maestro', ignore_pedal=False, sanity_check=sanity_check)


def install_guitarset(data_home=os.PathLike, no_down=False) -> None:
    if not no_down:
        url = "https://zenodo.org/record/7831843/files/guitarset_yourmt3_16k.tar.gz?download=1"
        checksum = "e3cfe0cc9394d91d9c290ce888821360"
        download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum)
    preprocess_guitarset16k(data_home, dataset_name='guitarset')
    create_filelist_by_style_guitarset16k(data_home, dataset_name='guitarset')


def install_enstdrums(data_home, no_down=False) -> None:
    if not no_down:
        url = "https://zenodo.org/record/7831843/files/enstdrums_yourmt3_16k.tar.gz?download=1"
        checksum = "7e28c2a923e4f4162b3d83877cedb5eb"
        download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum)
    preprocess_enstdrums16k(data_home, dataset_name='enstdrums')
    create_filelist_dtm_random_enstdrums16k(data_home, dataset_name='enstdrums')


def install_egmd(data_home, no_down=False) -> None:
    if not no_down:
        url = "https://zenodo.org/record/7831072/files/egmc_yourmt3_16k.tar.gz?download=1"
        checksum = "4f615157ea4c52a64c6c9dcf68bf2bde"
        download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum)
    preprocess_egmd16k(data_home, dataset_name='egmd')


def install_mirst500(data_home, zenodo_token, no_down=False, sanity_check=True, apply_correction=False) -> None:
    """ Update Oct 2023: MIR-ST500 with FULL audio files"""
    if not no_down:
        url = "https://zenodo.org/records/10016397/files/mir_st500_yourmt3_16k.tar.gz?download=1"
        checksum = "98eb52eb2456ce4034e21750f309da13"
        download_and_extract(data_home, url, check_sum=checksum, zenodo_token=zenodo_token)
    preprocess_mir_st500_16k(data_home, dataset_name='mir_st500', sanity_check=sanity_check)


def install_cmedia(data_home, zenodo_token, no_down=False, sanity_check=True) -> None:
    if not no_down:
        url = "https://zenodo.org/records/10016397/files/cmedia_yourmt3_16k.tar.gz?download=1"
        checksum = "e6cca23577ba7588e9ed9711a398f7cf"
        download_and_extract(data_home, url, check_sum=checksum, zenodo_token=zenodo_token)
    preprocess_cmedia_16k(data_home, dataset_name='cmedia', sanity_check=sanity_check, apply_correction=True)


def install_rwc_pop(data_home, zenodo_token, no_down=False) -> None:
    if not no_down:
        url = "https://zenodo.org/records/10016397/files/rwc_pop_yourmt3_16k.tar.gz?download=1"
        checksum = "ad459f9fa1b6b87676b2fb37c0ba5dfc"
        download_and_extract(data_home, url, check_sum=checksum, zenodo_token=zenodo_token)
    preprocess_rwc_pop16k(data_home, dataset_name='rwc_pop')  # bass transcriptions
    preprocess_rwc_pop_full16k(data_home, dataset_name='rwc_pop')  # full transcriptions


def install_mir1k(data_home, no_down=False) -> None:
    if not no_down:
        url = "https://zenodo.org/record/7955481/files/mir1k_yourmt3_16k.tar.gz?download=1"
        checksum = "4cbac56a4e971432ca807efd5cb76d67"
        download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum)
    # preprocess_mir1k_16k(data_home, dataset_name='mir1k')


def install_urmp(data_home, no_down=False) -> None:
    if not no_down:
        url = "https://zenodo.org/record/8021437/files/urmp_yourmt3_16k.tar.gz?download=1"
        checksum = "4f539c71678a77ba34f6dfca41072102"
        download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum)
    preprocess_urmp16k(data_home, dataset_name='urmp')


def install_idmt_smt_bass(data_home, no_down=False) -> None:
    if not no_down:
        url = "https://zenodo.org/records/10009959/files/idmt_smt_bass_yourmt3_16k.tar.gz?download=1"
        checksum = "0c95f91926a1e95b1f5d075c05b7eb76"
        download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum)
    preprocess_idmt_smt_bass_16k(data_home, dataset_name='idmt_smt_bass', sanity_check=True,
                                 edit_audio=False)  # the donwloaded audio has already been edited


def install_random_nsynth(data_home, no_down=False) -> None:
    return


def install_geerdes(data_home) -> None:
    try:
        preprocess_geerdes16k(data_home, dataset_name='geerdes', sanity_check=False)
    except Exception as e:
        print(e)
        print("Geerdes dataset is not available for download. Please contact the dataset provider.")


def regenerate_dataset_stats(data_home) -> None:
    generate_dataset_stats_for_all_datasets(data_home)


def get_cached_zenodo_token() -> str:
    # check if cached token exists
    if not os.path.exists('.cached_zenodo_token'):
        raise Exception("Cached Zenodo token not found. Please enter your Zenodo token.")
    # read cached token
    with open('.cached_zenodo_token', 'r') as f:
        zenodo_token = f.read().strip()
        print(f"Using cached Zenodo token: {zenodo_token}")
    return zenodo_token


def cache_zenodo_token(zenodo_token: str) -> None:
    with open('.cached_zenodo_token', 'w') as f:
        f.write(zenodo_token)
    print("Your Zenodo token is cached.")


def option_prompt(data_home: os.PathLike, no_download: bool = False) -> None:
    print("Select the dataset(s) to install (enter comma-separated numbers):")
    print("1. Slakh")
    print("2. MusicNet")
    print("3. MAPS")
    print("4. Maestro")
    print("5. GuitarSet")
    print("6. ENST-drums")
    print("7. EGMD")
    print("8. MIR-ST500 ** Restricted Access **")
    print("9. CMedia ** Restricted Access **")
    print("10. RWC-Pop (Bass and Full) ** Restricted Access **")
    print("11. MIR-1K (NOT SUPPORTED)")
    print("12. URMP")
    print("13. IDMT-SMT-Bass")
    print("14. Random-NSynth")
    print("15. Geerdes")
    print("16. Regenerate Dataset Stats (experimental)")
    print("17. Request Token for ** Restricted Access **")
    print("18. Exit")

    choice = input("Enter your choices (multiple choices with comma): ")
    choices = [c.strip() for c in choice.split(',')]

    if "18" in choices:
        print("Exiting.")
    else:
        # ask for Zenodo token
        for c in choices:
            if int(c) in [8, 9, 10]:
                if no_download is True:
                    zenodo_token = None
                else:
                    zenodo_token = input("Enter Zenodo token, or press enter to use the cached token:")
                    if zenodo_token == "":
                        zenodo_token = get_cached_zenodo_token()
                    else:
                        cache_zenodo_token(zenodo_token)
                    break

        if "1" in choices:
            install_slakh(data_home, no_down=no_download)
        if "2" in choices:
            install_musicnet(data_home, no_down=no_download)
        if "3" in choices:
            install_maps(data_home, no_down=no_download)
        if "4" in choices:
            install_maestro(data_home, no_down=no_download)
        if "5" in choices:
            install_guitarset(data_home, no_down=no_download)
        if "6" in choices:
            install_enstdrums(data_home, no_down=no_download)
        if "7" in choices:
            install_egmd(data_home, no_down=no_download)
        if "8" in choices:
            install_mirst500(data_home, zenodo_token, no_down=no_download)
        if "9" in choices:
            install_cmedia(data_home, zenodo_token, no_down=no_download)
        if "10" in choices:
            install_rwc_pop(data_home, zenodo_token, no_down=no_download)
        if "11" in choices:
            install_mir1k(data_home, no_down=no_download)
        if "12" in choices:
            install_urmp(data_home, no_down=no_download)
        if "13" in choices:
            install_idmt_smt_bass(data_home, no_down=no_download)
        if "14" in choices:
            install_random_nsynth(data_home, no_down=no_download)
        if "15" in choices:
            install_geerdes(data_home)  # not available for download
        if "16" in choices:
            regenerate_dataset_stats(data_home, no_down=no_download)
        if "17" in choices:
            print("\nPlease visit https://zenodo.org/records/10016397 to request a Zenodo token.")
            print("Upon submitting your request, you will receive an email with a link labeled 'Access the record'.")
            print("Copy the token that follows 'token=' in that link.")
        if not any(int(c) in range(16) for c in choices):
            print("Invalid choice(s). Please enter valid numbers separated by commas.")


if __name__ == "__main__":

    parser = argparse.ArgumentParser(description='Dataset installer script.')
    # data home dir
    parser.add_argument(
        'data_home',
        type=str,
        nargs='?',
        default=None,
        help='Path to data home directory. If None, use the default path defined in src/config/config.py')
    # `no_download` option
    parser.add_argument('--nodown',
                        '-nd',
                        action='store_true',
                        help='Flag to control downloading. If set, no downloading will occur.')
    args = parser.parse_args()

    if args.data_home is None:
        from config.config import shared_cfg
        data_home = shared_cfg["PATH"]["data_home"]
    else:
        data_home = args.data_home
    os.makedirs(data_home, exist_ok=True)
    no_download = args.nodown

    option_prompt(data_home, no_download)