mimbres's picture
.
a03c9b4
raw
history blame
5.71 kB
# Copyright 2024 The YourMT3 Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Please see the details in the LICENSE file.
"""task.py"""
from config.vocabulary import *
from utils.note_event_dataclasses import Event
task_cfg = {
"mt3_midi": { # 11 classes + drum class
"name": "mt3_midi",
"train_program_vocab": program_vocab_presets["mt3_midi"],
"train_drum_vocab": drum_vocab_presets["gm"],
},
"mt3_midi_plus": { # 11 classes + singing + drum class
"name": "mt3_midi_plus",
"train_program_vocab": program_vocab_presets["mt3_midi_plus"],
"train_drum_vocab": drum_vocab_presets["gm"],
},
"mt3_full": { # 34 classes (except drums) as in MT3 paper
"name": "mt3_full",
"train_program_vocab": program_vocab_presets["mt3_full"],
"train_drum_vocab": drum_vocab_presets["gm"],
},
"mt3_full_plus": { # 34 classes (except drums) as in MT3 paper + singing + drum class
"name": "mt3_full_plus",
"train_program_vocab": program_vocab_presets["mt3_full_plus"],
"train_drum_vocab": drum_vocab_presets["gm"],
},
"gm_ext_plus": { # 13 classes + singing + chorus (except drums)
"name": "gm_ext_plus",
"train_program_vocab": program_vocab_presets["gm_ext_plus"],
"train_drum_vocab": drum_vocab_presets["gm"],
},
"singing_v1": {
"name": "singing",
"train_program_vocab": program_vocab_presets["mt3_full_plus"],
"train_drum_vocab": drum_vocab_presets["gm"],
"subtask_tokens": ["task", "transcribe_singing", "transcribe_all"],
"ignore_decoding_tokens": ["task", "transcribe_singing", "transcribe_all"],
"max_task_token_length": 2,
"eval_subtask_prefix": {
"default": [Event("transcribe_all", 0), Event("task", 0)],
"singing-only": [Event("transcribe_singing", 0),
Event("task", 0)],
}
},
"singing_drum_v1": {
"name": "singing_drum",
"train_program_vocab": program_vocab_presets["mt3_full_plus"],
"train_drum_vocab": drum_vocab_presets["gm"],
"subtask_tokens": ["task", "transcribe_singing", "transcribe_drum", "transcribe_all"],
"ignore_decoding_tokens": [
"task", "transcribe_singing", "transcribe_drum", "transcribe_all"
],
"max_task_token_length": 2,
"eval_subtask_prefix": {
"default": [Event("transcribe_all", 0), Event("task", 0)],
"singing-only": [Event("transcribe_singing", 0),
Event("task", 0)],
"drum-only": [Event("transcribe_drum", 0),
Event("task", 0)],
}
},
"mc13": { # multi-channel decoding task of {11 classes + drums + singing}
"name": "mc13",
"train_program_vocab": program_vocab_presets["gm_plus"],
"train_drum_vocab": drum_vocab_presets["gm"],
"num_decoding_channels": len(program_vocab_presets["gm_plus"]) + 1, # 13
"max_note_token_length_per_ch": 512, # multi-channel decoding exclusive parameter
"mask_loss_strategy": None, # multi-channel decoding exclusive parameter
},
"mc13_256": { # multi-channel decoding task of {11 classes + drums + singing}
"name": "mc13_256",
"train_program_vocab": program_vocab_presets["gm_plus"],
"train_drum_vocab": drum_vocab_presets["gm"],
"num_decoding_channels": len(program_vocab_presets["gm_plus"]) + 1, # 13
"max_note_token_length_per_ch": 256, # multi-channel decoding exclusive parameter
"mask_loss_strategy": None, # multi-channel decoding exclusive parameter
},
"mc13_full_plus": { # multi-channel decoding task of {34 classes + drums + singing & chorus} mapped to 13 channels
"name": "mc13_full_plus",
"train_program_vocab": program_vocab_presets["mt3_full_plus"],
"train_drum_vocab": drum_vocab_presets["gm"],
"program2channel_vocab_source": program_vocab_presets["gm_plus"],
"num_decoding_channels": 13,
"max_note_token_length_per_ch": 512, # multi-channel decoding exclusive parameter
"mask_loss_strategy": None, # multi-channel decoding exclusive parameter
},
"mc13_full_plus_256": { # multi-channel decoding task of {34 classes + drums + singing & chorus} mapped to 13 channels
"name": "mc13_full_plus_256",
"train_program_vocab": program_vocab_presets["mt3_full_plus"],
"train_drum_vocab": drum_vocab_presets["gm"],
"program2channel_vocab_source": program_vocab_presets["gm_plus"],
"num_decoding_channels": 13,
"max_note_token_length_per_ch": 256, # multi-channel decoding exclusive parameter
"mask_loss_strategy": None, # multi-channel decoding exclusive parameter
},
"exc_v1": {
"name": "exclusive",
"train_program_vocab": program_vocab_presets["mt3_full_plus"],
"train_drum_vocab": drum_vocab_presets["gm"],
"subtask_tokens": ["transcribe", "all", ":"],
# "ignore_decoding_tokens": [
# "task", "transcribe_singing", "transcribe_drum", "transcribe_all"
# ],
# "max_task_token_length": 2,
"ignore_decoding_tokens_from_and_to": ["transcribe", ":"],
"eval_subtask_prefix": { # this is the main task that transcribe all instruments
"default": [Event("transcribe", 0), Event("all", 0), Event(":", 0)],
},
"shuffle_subtasks": True,
},
}