YourMT3 / amt /src /install_dataset.py
mimbres's picture
.
a03c9b4
# Copyright 2024 The YourMT3 Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Please see the details in the LICENSE file.
""" install_dataset.py """
import os
import argparse
import mirdata
from typing import Optional, Tuple, Union
from utils.preprocess.generate_dataset_stats import generate_dataset_stats_for_all_datasets, update_dataset_stats_for_new_dataset
from utils.mirdata_dev.datasets import slakh16k
from utils.preprocess.preprocess_slakh import preprocess_slakh16k, add_program_and_is_drum_info_to_file_list
from utils.preprocess.preprocess_musicnet import preprocess_musicnet16k
from utils.preprocess.preprocess_maps import preprocess_maps16k
from utils.preprocess.preprocess_maestro import preprocess_maestro16k
from utils.preprocess.preprocess_guitarset import preprocess_guitarset16k, create_filelist_by_style_guitarset16k
from utils.preprocess.preprocess_enstdrums import preprocess_enstdrums16k, create_filelist_dtm_random_enstdrums16k
from utils.preprocess.preprocess_mir_st500 import preprocess_mir_st500_16k
from utils.preprocess.preprocess_cmedia import preprocess_cmedia_16k
from utils.preprocess.preprocess_rwc_pop_full import preprocess_rwc_pop_full16k
from utils.preprocess.preprocess_rwc_pop import preprocess_rwc_pop16k
from utils.preprocess.preprocess_egmd import preprocess_egmd16k
from utils.preprocess.preprocess_mir1k import preprocess_mir1k_16k
from utils.preprocess.preprocess_urmp import preprocess_urmp16k
from utils.preprocess.preprocess_idmt_smt_bass import preprocess_idmt_smt_bass_16k
from utils.preprocess.preprocess_geerdes import preprocess_geerdes16k
from utils.utils import download_and_extract #, download_and_extract_zenodo_restricted
# zenodo_token = "eyJhbGciOiJIUzUxMiIsImlhdCI6MTcxMDE1MDYzNywiZXhwIjoxNzEyNzA3MTk5fQ.eyJpZCI6ImRmODA5NzZlLTBjM2QtNDk5NS05YjM0LWFiNGM4NzJhMmZhMSIsImRhdGEiOnt9LCJyYW5kb20iOiIwMzY5ZDcxZjc2NTMyN2UyYmVmN2ExYjJkMmMyYTRhNSJ9.0aHnNC-7ivWQO6l8twjLR0NDH4boC0uOolAAmogVt7XRi2PHU5MEKBQoK7-wgDdnmWEIqEIvoLO6p8KTnsY9dg"
def install_slakh(data_home=os.PathLike, no_down=False) -> None:
if not no_down:
ds = slakh16k.Dataset(data_home, version='2100-yourmt3-16k')
ds.download(partial_download=['2100-yourmt3-16k', 'index'])
del (ds)
preprocess_slakh16k(data_home, delete_source_files=False, fix_bass_octave=True)
add_program_and_is_drum_info_to_file_list(data_home)
def install_musicnet(data_home=os.PathLike, no_down=False) -> None:
if not no_down:
url = "https://zenodo.org/record/7811639/files/musicnet_yourmt3_16k.tar.gz?download=1"
checksum = "a2da7c169e26d452a4e8b9bef498b3d7"
download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum)
preprocess_musicnet16k(data_home, dataset_name='musicnet')
def install_maps(data_home=os.PathLike, no_down=False, sanity_check=False) -> None:
if not no_down:
url = "https://zenodo.org/record/7812075/files/maps_yourmt3_16k.tar.gz?download=1"
checksum = "6b070d162c931cd5e69c16ef2398a649"
download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum)
preprocess_maps16k(data_home, dataset_name='maps', ignore_pedal=False, sanity_check=sanity_check)
def install_maestro(data_home=os.PathLike, no_down=False, sanity_check=False) -> None:
if not no_down:
url = "https://zenodo.org/record/7852176/files/maestro_yourmt3_16k.tar.gz?download=1"
checksum = "c17c6a188d936e5ff3870ef27144d397"
download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum)
preprocess_maestro16k(data_home, dataset_name='maestro', ignore_pedal=False, sanity_check=sanity_check)
def install_guitarset(data_home=os.PathLike, no_down=False) -> None:
if not no_down:
url = "https://zenodo.org/record/7831843/files/guitarset_yourmt3_16k.tar.gz?download=1"
checksum = "e3cfe0cc9394d91d9c290ce888821360"
download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum)
preprocess_guitarset16k(data_home, dataset_name='guitarset')
create_filelist_by_style_guitarset16k(data_home, dataset_name='guitarset')
def install_enstdrums(data_home, no_down=False) -> None:
if not no_down:
url = "https://zenodo.org/record/7831843/files/enstdrums_yourmt3_16k.tar.gz?download=1"
checksum = "7e28c2a923e4f4162b3d83877cedb5eb"
download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum)
preprocess_enstdrums16k(data_home, dataset_name='enstdrums')
create_filelist_dtm_random_enstdrums16k(data_home, dataset_name='enstdrums')
def install_egmd(data_home, no_down=False) -> None:
if not no_down:
url = "https://zenodo.org/record/7831072/files/egmc_yourmt3_16k.tar.gz?download=1"
checksum = "4f615157ea4c52a64c6c9dcf68bf2bde"
download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum)
preprocess_egmd16k(data_home, dataset_name='egmd')
def install_mirst500(data_home, zenodo_token, no_down=False, sanity_check=True, apply_correction=False) -> None:
""" Update Oct 2023: MIR-ST500 with FULL audio files"""
if not no_down:
url = "https://zenodo.org/records/10016397/files/mir_st500_yourmt3_16k.tar.gz?download=1"
checksum = "98eb52eb2456ce4034e21750f309da13"
download_and_extract(data_home, url, check_sum=checksum, zenodo_token=zenodo_token)
preprocess_mir_st500_16k(data_home, dataset_name='mir_st500', sanity_check=sanity_check)
def install_cmedia(data_home, zenodo_token, no_down=False, sanity_check=True) -> None:
if not no_down:
url = "https://zenodo.org/records/10016397/files/cmedia_yourmt3_16k.tar.gz?download=1"
checksum = "e6cca23577ba7588e9ed9711a398f7cf"
download_and_extract(data_home, url, check_sum=checksum, zenodo_token=zenodo_token)
preprocess_cmedia_16k(data_home, dataset_name='cmedia', sanity_check=sanity_check, apply_correction=True)
def install_rwc_pop(data_home, zenodo_token, no_down=False) -> None:
if not no_down:
url = "https://zenodo.org/records/10016397/files/rwc_pop_yourmt3_16k.tar.gz?download=1"
checksum = "ad459f9fa1b6b87676b2fb37c0ba5dfc"
download_and_extract(data_home, url, check_sum=checksum, zenodo_token=zenodo_token)
preprocess_rwc_pop16k(data_home, dataset_name='rwc_pop') # bass transcriptions
preprocess_rwc_pop_full16k(data_home, dataset_name='rwc_pop') # full transcriptions
def install_mir1k(data_home, no_down=False) -> None:
if not no_down:
url = "https://zenodo.org/record/7955481/files/mir1k_yourmt3_16k.tar.gz?download=1"
checksum = "4cbac56a4e971432ca807efd5cb76d67"
download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum)
# preprocess_mir1k_16k(data_home, dataset_name='mir1k')
def install_urmp(data_home, no_down=False) -> None:
if not no_down:
url = "https://zenodo.org/record/8021437/files/urmp_yourmt3_16k.tar.gz?download=1"
checksum = "4f539c71678a77ba34f6dfca41072102"
download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum)
preprocess_urmp16k(data_home, dataset_name='urmp')
def install_idmt_smt_bass(data_home, no_down=False) -> None:
if not no_down:
url = "https://zenodo.org/records/10009959/files/idmt_smt_bass_yourmt3_16k.tar.gz?download=1"
checksum = "0c95f91926a1e95b1f5d075c05b7eb76"
download_and_extract(data_home, url, remove_tar_file=True, check_sum=checksum)
preprocess_idmt_smt_bass_16k(data_home, dataset_name='idmt_smt_bass', sanity_check=True,
edit_audio=False) # the donwloaded audio has already been edited
def install_random_nsynth(data_home, no_down=False) -> None:
return
def install_geerdes(data_home) -> None:
try:
preprocess_geerdes16k(data_home, dataset_name='geerdes', sanity_check=False)
except Exception as e:
print(e)
print("Geerdes dataset is not available for download. Please contact the dataset provider.")
def regenerate_dataset_stats(data_home) -> None:
generate_dataset_stats_for_all_datasets(data_home)
def get_cached_zenodo_token() -> str:
# check if cached token exists
if not os.path.exists('.cached_zenodo_token'):
raise Exception("Cached Zenodo token not found. Please enter your Zenodo token.")
# read cached token
with open('.cached_zenodo_token', 'r') as f:
zenodo_token = f.read().strip()
print(f"Using cached Zenodo token: {zenodo_token}")
return zenodo_token
def cache_zenodo_token(zenodo_token: str) -> None:
with open('.cached_zenodo_token', 'w') as f:
f.write(zenodo_token)
print("Your Zenodo token is cached.")
def option_prompt(data_home: os.PathLike, no_download: bool = False) -> None:
print("Select the dataset(s) to install (enter comma-separated numbers):")
print("1. Slakh")
print("2. MusicNet")
print("3. MAPS")
print("4. Maestro")
print("5. GuitarSet")
print("6. ENST-drums")
print("7. EGMD")
print("8. MIR-ST500 ** Restricted Access **")
print("9. CMedia ** Restricted Access **")
print("10. RWC-Pop (Bass and Full) ** Restricted Access **")
print("11. MIR-1K (NOT SUPPORTED)")
print("12. URMP")
print("13. IDMT-SMT-Bass")
print("14. Random-NSynth")
print("15. Geerdes")
print("16. Regenerate Dataset Stats (experimental)")
print("17. Request Token for ** Restricted Access **")
print("18. Exit")
choice = input("Enter your choices (multiple choices with comma): ")
choices = [c.strip() for c in choice.split(',')]
if "18" in choices:
print("Exiting.")
else:
# ask for Zenodo token
for c in choices:
if int(c) in [8, 9, 10]:
if no_download is True:
zenodo_token = None
else:
zenodo_token = input("Enter Zenodo token, or press enter to use the cached token:")
if zenodo_token == "":
zenodo_token = get_cached_zenodo_token()
else:
cache_zenodo_token(zenodo_token)
break
if "1" in choices:
install_slakh(data_home, no_down=no_download)
if "2" in choices:
install_musicnet(data_home, no_down=no_download)
if "3" in choices:
install_maps(data_home, no_down=no_download)
if "4" in choices:
install_maestro(data_home, no_down=no_download)
if "5" in choices:
install_guitarset(data_home, no_down=no_download)
if "6" in choices:
install_enstdrums(data_home, no_down=no_download)
if "7" in choices:
install_egmd(data_home, no_down=no_download)
if "8" in choices:
install_mirst500(data_home, zenodo_token, no_down=no_download)
if "9" in choices:
install_cmedia(data_home, zenodo_token, no_down=no_download)
if "10" in choices:
install_rwc_pop(data_home, zenodo_token, no_down=no_download)
if "11" in choices:
install_mir1k(data_home, no_down=no_download)
if "12" in choices:
install_urmp(data_home, no_down=no_download)
if "13" in choices:
install_idmt_smt_bass(data_home, no_down=no_download)
if "14" in choices:
install_random_nsynth(data_home, no_down=no_download)
if "15" in choices:
install_geerdes(data_home) # not available for download
if "16" in choices:
regenerate_dataset_stats(data_home, no_down=no_download)
if "17" in choices:
print("\nPlease visit https://zenodo.org/records/10016397 to request a Zenodo token.")
print("Upon submitting your request, you will receive an email with a link labeled 'Access the record'.")
print("Copy the token that follows 'token=' in that link.")
if not any(int(c) in range(16) for c in choices):
print("Invalid choice(s). Please enter valid numbers separated by commas.")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Dataset installer script.')
# data home dir
parser.add_argument(
'data_home',
type=str,
nargs='?',
default=None,
help='Path to data home directory. If None, use the default path defined in src/config/config.py')
# `no_download` option
parser.add_argument('--nodown',
'-nd',
action='store_true',
help='Flag to control downloading. If set, no downloading will occur.')
args = parser.parse_args()
if args.data_home is None:
from config.config import shared_cfg
data_home = shared_cfg["PATH"]["data_home"]
else:
data_home = args.data_home
os.makedirs(data_home, exist_ok=True)
no_download = args.nodown
option_prompt(data_home, no_download)