# Copyright 2024 The YourMT3 Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Please see the details in the LICENSE file. """ install_dataset.py """ import os import argparse import mirdata from typing import Optional, Tuple, Union from utils.preprocess.generate_dataset_stats import generate_dataset_stats_for_all_datasets, update_dataset_stats_for_new_dataset from utils.mirdata_dev.datasets import slakh16k from utils.preprocess.preprocess_slakh import preprocess_slakh16k, add_program_and_is_drum_info_to_file_list from utils.preprocess.preprocess_musicnet import preprocess_musicnet16k from utils.preprocess.preprocess_maps import preprocess_maps16k from utils.preprocess.preprocess_maestro import preprocess_maestro16k from utils.preprocess.preprocess_guitarset import preprocess_guitarset16k, create_filelist_by_style_guitarset16k from utils.preprocess.preprocess_enstdrums import preprocess_enstdrums16k, create_filelist_dtm_random_enstdrums16k from utils.preprocess.preprocess_mir_st500 import preprocess_mir_st500_16k from utils.preprocess.preprocess_cmedia import preprocess_cmedia_16k from utils.preprocess.preprocess_rwc_pop_full import preprocess_rwc_pop_full16k from utils.preprocess.preprocess_rwc_pop import preprocess_rwc_pop16k from utils.preprocess.preprocess_egmd import preprocess_egmd16k from utils.preprocess.preprocess_mir1k import preprocess_mir1k_16k from utils.preprocess.preprocess_urmp import preprocess_urmp16k from utils.preprocess.preprocess_idmt_smt_bass import preprocess_idmt_smt_bass_16k from utils.preprocess.preprocess_geerdes import preprocess_geerdes16k from utils.utils import download_and_extract #, download_and_extract_zenodo_restricted # zenodo_token = "eyJhbGciOiJIUzUxMiIsImlhdCI6MTcxMDE1MDYzNywiZXhwIjoxNzEyNzA3MTk5fQ.eyJpZCI6ImRmODA5NzZlLTBjM2QtNDk5NS05YjM0LWFiNGM4NzJhMmZhMSIsImRhdGEiOnt9LCJyYW5kb20iOiIwMzY5ZDcxZjc2NTMyN2UyYmVmN2ExYjJkMmMyYTRhNSJ9.0aHnNC-7ivWQO6l8twjLR0NDH4boC0uOolAAmogVt7XRi2PHU5MEKBQoK7-wgDdnmWEIqEIvoLO6p8KTnsY9dg" def install_slakh(data_home=os.PathLike, no_down=False) -> None: if not no_down: ds = slakh16k.Dataset(data_home, version='2100-yourmt3-16k') ds.download(partial_download=['2100-yourmt3-16k', 'index']) del (ds) preprocess_slakh16k(data_home, delete_source_files=False, fix_bass_octave=True) add_program_and_is_drum_info_to_file_list(data_home) def install_musicnet(data_home=os.PathLike, no_down=False) -> None: if not no_down: url = "https://zenodo.org/record/7811639/files/musicnet_yourmt3_16k.tar.gz?download=1" checksum = "a2da7c169e26d452a4e8b9bef498b3d7" download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum) preprocess_musicnet16k(data_home, dataset_name='musicnet') def install_maps(data_home=os.PathLike, no_down=False, sanity_check=False) -> None: if not no_down: url = "https://zenodo.org/record/7812075/files/maps_yourmt3_16k.tar.gz?download=1" checksum = "6b070d162c931cd5e69c16ef2398a649" download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum) preprocess_maps16k(data_home, dataset_name='maps', ignore_pedal=False, sanity_check=sanity_check) def install_maestro(data_home=os.PathLike, no_down=False, sanity_check=False) -> None: if not no_down: url = "https://zenodo.org/record/7852176/files/maestro_yourmt3_16k.tar.gz?download=1" checksum = "c17c6a188d936e5ff3870ef27144d397" download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum) preprocess_maestro16k(data_home, dataset_name='maestro', ignore_pedal=False, sanity_check=sanity_check) def install_guitarset(data_home=os.PathLike, no_down=False) -> None: if not no_down: url = "https://zenodo.org/record/7831843/files/guitarset_yourmt3_16k.tar.gz?download=1" checksum = "e3cfe0cc9394d91d9c290ce888821360" download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum) preprocess_guitarset16k(data_home, dataset_name='guitarset') create_filelist_by_style_guitarset16k(data_home, dataset_name='guitarset') def install_enstdrums(data_home, no_down=False) -> None: if not no_down: url = "https://zenodo.org/record/7831843/files/enstdrums_yourmt3_16k.tar.gz?download=1" checksum = "7e28c2a923e4f4162b3d83877cedb5eb" download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum) preprocess_enstdrums16k(data_home, dataset_name='enstdrums') create_filelist_dtm_random_enstdrums16k(data_home, dataset_name='enstdrums') def install_egmd(data_home, no_down=False) -> None: if not no_down: url = "https://zenodo.org/record/7831072/files/egmc_yourmt3_16k.tar.gz?download=1" checksum = "4f615157ea4c52a64c6c9dcf68bf2bde" download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum) preprocess_egmd16k(data_home, dataset_name='egmd') def install_mirst500(data_home, zenodo_token, no_down=False, sanity_check=True, apply_correction=False) -> None: """ Update Oct 2023: MIR-ST500 with FULL audio files""" if not no_down: url = "https://zenodo.org/records/10016397/files/mir_st500_yourmt3_16k.tar.gz?download=1" checksum = "98eb52eb2456ce4034e21750f309da13" download_and_extract(data_home, url, check_sum=checksum, zenodo_token=zenodo_token) preprocess_mir_st500_16k(data_home, dataset_name='mir_st500', sanity_check=sanity_check) def install_cmedia(data_home, zenodo_token, no_down=False, sanity_check=True) -> None: if not no_down: url = "https://zenodo.org/records/10016397/files/cmedia_yourmt3_16k.tar.gz?download=1" checksum = "e6cca23577ba7588e9ed9711a398f7cf" download_and_extract(data_home, url, check_sum=checksum, zenodo_token=zenodo_token) preprocess_cmedia_16k(data_home, dataset_name='cmedia', sanity_check=sanity_check, apply_correction=True) def install_rwc_pop(data_home, zenodo_token, no_down=False) -> None: if not no_down: url = "https://zenodo.org/records/10016397/files/rwc_pop_yourmt3_16k.tar.gz?download=1" checksum = "ad459f9fa1b6b87676b2fb37c0ba5dfc" download_and_extract(data_home, url, check_sum=checksum, zenodo_token=zenodo_token) preprocess_rwc_pop16k(data_home, dataset_name='rwc_pop') # bass transcriptions preprocess_rwc_pop_full16k(data_home, dataset_name='rwc_pop') # full transcriptions def install_mir1k(data_home, no_down=False) -> None: if not no_down: url = "https://zenodo.org/record/7955481/files/mir1k_yourmt3_16k.tar.gz?download=1" checksum = "4cbac56a4e971432ca807efd5cb76d67" download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum) # preprocess_mir1k_16k(data_home, dataset_name='mir1k') def install_urmp(data_home, no_down=False) -> None: if not no_down: url = "https://zenodo.org/record/8021437/files/urmp_yourmt3_16k.tar.gz?download=1" checksum = "4f539c71678a77ba34f6dfca41072102" download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum) preprocess_urmp16k(data_home, dataset_name='urmp') def install_idmt_smt_bass(data_home, no_down=False) -> None: if not no_down: url = "https://zenodo.org/records/10009959/files/idmt_smt_bass_yourmt3_16k.tar.gz?download=1" checksum = "0c95f91926a1e95b1f5d075c05b7eb76" download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum) preprocess_idmt_smt_bass_16k(data_home, dataset_name='idmt_smt_bass', sanity_check=True, edit_audio=False) # the donwloaded audio has already been edited def install_random_nsynth(data_home, no_down=False) -> None: return def install_geerdes(data_home) -> None: try: preprocess_geerdes16k(data_home, dataset_name='geerdes', sanity_check=False) except Exception as e: print(e) print("Geerdes dataset is not available for download. Please contact the dataset provider.") def regenerate_dataset_stats(data_home) -> None: generate_dataset_stats_for_all_datasets(data_home) def get_cached_zenodo_token() -> str: # check if cached token exists if not os.path.exists('.cached_zenodo_token'): raise Exception("Cached Zenodo token not found. Please enter your Zenodo token.") # read cached token with open('.cached_zenodo_token', 'r') as f: zenodo_token = f.read().strip() print(f"Using cached Zenodo token: {zenodo_token}") return zenodo_token def cache_zenodo_token(zenodo_token: str) -> None: with open('.cached_zenodo_token', 'w') as f: f.write(zenodo_token) print("Your Zenodo token is cached.") def option_prompt(data_home: os.PathLike, no_download: bool = False) -> None: print("Select the dataset(s) to install (enter comma-separated numbers):") print("1. Slakh") print("2. MusicNet") print("3. MAPS") print("4. Maestro") print("5. GuitarSet") print("6. ENST-drums") print("7. EGMD") print("8. MIR-ST500 ** Restricted Access **") print("9. CMedia ** Restricted Access **") print("10. RWC-Pop (Bass and Full) ** Restricted Access **") print("11. MIR-1K (NOT SUPPORTED)") print("12. URMP") print("13. IDMT-SMT-Bass") print("14. Random-NSynth") print("15. Geerdes") print("16. Regenerate Dataset Stats (experimental)") print("17. Request Token for ** Restricted Access **") print("18. Exit") choice = input("Enter your choices (multiple choices with comma): ") choices = [c.strip() for c in choice.split(',')] if "18" in choices: print("Exiting.") else: # ask for Zenodo token for c in choices: if int(c) in [8, 9, 10]: if no_download is True: zenodo_token = None else: zenodo_token = input("Enter Zenodo token, or press enter to use the cached token:") if zenodo_token == "": zenodo_token = get_cached_zenodo_token() else: cache_zenodo_token(zenodo_token) break if "1" in choices: install_slakh(data_home, no_down=no_download) if "2" in choices: install_musicnet(data_home, no_down=no_download) if "3" in choices: install_maps(data_home, no_down=no_download) if "4" in choices: install_maestro(data_home, no_down=no_download) if "5" in choices: install_guitarset(data_home, no_down=no_download) if "6" in choices: install_enstdrums(data_home, no_down=no_download) if "7" in choices: install_egmd(data_home, no_down=no_download) if "8" in choices: install_mirst500(data_home, zenodo_token, no_down=no_download) if "9" in choices: install_cmedia(data_home, zenodo_token, no_down=no_download) if "10" in choices: install_rwc_pop(data_home, zenodo_token, no_down=no_download) if "11" in choices: install_mir1k(data_home, no_down=no_download) if "12" in choices: install_urmp(data_home, no_down=no_download) if "13" in choices: install_idmt_smt_bass(data_home, no_down=no_download) if "14" in choices: install_random_nsynth(data_home, no_down=no_download) if "15" in choices: install_geerdes(data_home) # not available for download if "16" in choices: regenerate_dataset_stats(data_home, no_down=no_download) if "17" in choices: print("\nPlease visit https://zenodo.org/records/10016397 to request a Zenodo token.") print("Upon submitting your request, you will receive an email with a link labeled 'Access the record'.") print("Copy the token that follows 'token=' in that link.") if not any(int(c) in range(16) for c in choices): print("Invalid choice(s). Please enter valid numbers separated by commas.") if __name__ == "__main__": parser = argparse.ArgumentParser(description='Dataset installer script.') # data home dir parser.add_argument( 'data_home', type=str, nargs='?', default=None, help='Path to data home directory. If None, use the default path defined in src/config/config.py') # `no_download` option parser.add_argument('--nodown', '-nd', action='store_true', help='Flag to control downloading. If set, no downloading will occur.') args = parser.parse_args() if args.data_home is None: from config.config import shared_cfg data_home = shared_cfg["PATH"]["data_home"] else: data_home = args.data_home os.makedirs(data_home, exist_ok=True) no_download = args.nodown option_prompt(data_home, no_download)