Spaces:
Build error
Build error
File size: 12,636 Bytes
a03c9b4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 |
# Copyright 2024 The YourMT3 Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Please see the details in the LICENSE file.
""" event_codec.py: Encodes and decodes events to/from indices
🚀 Improvements:
• Encoding uses a precomputed dictionary in Python. This achieves a time
complexity of O(1).
• Decoding has time complexity of O(1), while the original code from MT3
(Gardner et al.) has a time complexity of O(n).
In practice, the performance of this optimized code was 4x faster for encoding
and decoding compared to the original code.
"""
from typing import List, Dict, Tuple, Optional
from utils.note_event_dataclasses import Event, EventRange
# from bisect import bisect_right
class FastCodec:
""" Fast Encoding and decoding Event. """
def __init__(self,
special_tokens: List[str],
max_shift_steps: int,
event_ranges: List[EventRange],
program_vocabulary: Optional[Dict] = None,
drum_vocabulary: Optional[Dict] = None,
extra_tokens: List[str] = [],
name: Optional[str] = None):
""" Initializes the FastCodec object.
:param special_tokens: List of special tokens to include in the vocabulary.
:param max_shift_steps: The maximum number of steps to shift.
:param event_ranges: List of EventRange objects.
:param instr_vocabulary: A dictionary of instrument groups. Please see config/vocabulary.py
We apply vocabulary only for encoding in training.
:param drum_vocabulary: A dictionary of drum mapping. Please see config/vocabulary.py
We apply vocabulary only for encoding in training.
:param name: Name of the codec.
"""
# Store the special tokens and event ranges.
self.special_tokens = special_tokens
self._special_token_ranges = []
self._extra_token_ranges = []
for token in special_tokens:
self._special_token_ranges.append(EventRange(token, 0, 0))
for token in extra_tokens:
self._extra_token_ranges.append(EventRange(token, 0, 0))
self._shift_range = EventRange(type='shift', min_value=0, max_value=max_shift_steps - 1)
self._event_ranges = self._special_token_ranges + [self._shift_range] + event_ranges + self._extra_token_ranges
# Ensure all event types have unique names.
assert len(self._event_ranges) == len(set([er.type for er in self._event_ranges]))
# Store the name of the codec, so that we can identify it in tokenizer.
self._name = name
# Create dictionary for decoding
self._decode_dict = {}
self._encode_dict = {}
self._event_type_range_dict = {}
idx = 0
for er in self._event_ranges:
start_idx = idx
for value in range(er.min_value, er.max_value + 1):
self._decode_dict[idx] = Event(type=er.type, value=value)
self._encode_dict[(er.type, value)] = idx
idx += 1
end_idx = idx - 1
self._event_type_range_dict[er.type] = (start_idx, end_idx)
self._num_classes = idx
# Create inverse vocabulary for instrument groups
if program_vocabulary is not None:
self.inverse_vocab_program = {}
self._create_inverse_vocab_program(program_vocabulary)
else:
self.inverse_vocab_program = None
# Create inverse vocabulary for drum mapping
if drum_vocabulary is not None:
self.inverse_vocab_drum = {}
self._create_inverse_vocab_drum(drum_vocabulary)
else:
self.inverse_vocab_drum = None
@property
def num_classes(self) -> int:
return self._num_classes
def _create_inverse_vocab_program(self, vocab):
for key, values in vocab.items():
for value in values:
self.inverse_vocab_program[value] = values[0]
def _create_inverse_vocab_drum(self, vocab):
for key, values in vocab.items():
for value in values:
self.inverse_vocab_drum[value] = values[0]
def encode_event(self, event: Event) -> int:
"""Encode an event to an index."""
if (event.type, event.value) not in self._encode_dict:
raise ValueError(f'Unknown event type: {event.type} or value: {event.value}')
if event.type == 'program' and self.inverse_vocab_program is not None:
# If the event value is not in the vocabulary, use the original value
_event_value = self.inverse_vocab_program.get(event.value, event.value)
return self._encode_dict[(event.type, _event_value)]
elif event.type == 'drum' and self.inverse_vocab_drum is not None:
_event_value = self.inverse_vocab_drum.get(event.value, event.value)
return self._encode_dict[(event.type, _event_value)]
else:
return self._encode_dict[(event.type, event.value)]
def event_type_range(self, event_type: str) -> Tuple[int, int]:
"""Return [min_id, max_id] for an event type."""
if event_type not in self._event_type_range_dict:
raise ValueError(f'Unknown event type: {event_type}')
return self._event_type_range_dict[event_type]
def decode_event_index(self, index: int) -> Event:
"""Decode an event index to an Event."""
if index < 0 or index >= self.num_classes:
raise ValueError(f'Unknown event index: {index}')
decoded_event = self._decode_dict[index]
# Create a new event with the same type and value
return Event(type=decoded_event.type, value=decoded_event.value)
# class FastCodec:
# """ Fast Encoding and decoding Event. """
# def __init__(self,
# special_tokens: List[str],
# max_shift_steps: int,
# event_ranges: List[EventRange],
# name: Optional[str] = None):
# """ Initializes the FastCodec object.
# :param special_tokens: List of special tokens to include in the vocabulary.
# :param max_shift_steps: The maximum number of steps to shift.
# :param event_ranges: List of EventRange objects.
# """
# # Store the special tokens and event ranges.
# self.special_tokens = special_tokens
# self._special_token_ranges = []
# for token in special_tokens:
# self._special_token_ranges.append(EventRange(token, 0, 0))
# self._shift_range = EventRange(
# type='shift', min_value=0, max_value=max_shift_steps - 1)
# self._event_ranges = self._special_token_ranges + [self._shift_range
# ] + event_ranges
# # Ensure all event types have unique names.
# assert len(self._event_ranges) == len(
# set([er.type for er in self._event_ranges]))
# # Precompute cumulative offsets.
# self._cumulative_offsets = [0]
# for er in self._event_ranges:
# self._cumulative_offsets.append(self._cumulative_offsets[-1] +
# er.max_value - er.min_value + 1)
# # Create event type to range and offset mapping.
# self._event_type_to_range_offset = {}
# for er, offset in zip(self._event_ranges, self._cumulative_offsets):
# self._event_type_to_range_offset[er.type] = (er, offset)
# # Store the name of the codec, so that we can identify it in tokenizer.
# self._name = name
# @property
# def num_classes(self) -> int:
# return self._cumulative_offsets[-1]
# def encode_event(self, event: Event) -> int:
# """Encode an event to an index."""
# if event.type not in self._event_type_to_range_offset:
# raise ValueError(f'Unknown event type: {event.type}')
# er, offset = self._event_type_to_range_offset[event.type]
# if not er.min_value <= event.value <= er.max_value:
# raise ValueError(
# f'Event value {event.value} is not within valid range '
# f'[{er.min_value}, {er.max_value}] for type {event.type}')
# return offset + event.value - er.min_value
# def event_type_range(self, event_type: str) -> Tuple[int, int]:
# """Return [min_id, max_id] for an event type."""
# offset = 0
# for er in self._event_ranges:
# if event_type == er.type:
# return offset, offset + (er.max_value - er.min_value)
# offset += er.max_value - er.min_value + 1
# raise ValueError(f'Unknown event type: {event_type}')
# def decode_event_index(self, index: int) -> Event:
# """Decode an event index to an Event."""
# if index < 0 or index >= self.num_classes:
# raise ValueError(f'Unknown event index: {index}')
# # Find the event range using binary search.
# range_idx = bisect_right(self._cumulative_offsets, index) - 1
# er = self._event_ranges[range_idx]
# offset = self._cumulative_offsets[range_idx]
# return Event(type=er.type, value=er.min_value + index - offset)
# Original code
#
# https://github.com/magenta/mt3/blob/main/mt3/event_codec.py
# Copyright 2022 The MT3 Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# class Codec:
# """Encode and decode events."""
#
# def __init__(self, special_tokens: List[str], max_shift_steps: int,
# event_ranges: List[EventRange]):
# """Define Codec.
# """
# self._special_token_ranges = []
# for token in special_tokens:
# self._special_token_ranges.append(EventRange(token, 0, 0))
# self._shift_range = EventRange(
# type='shift', min_value=0, max_value=max_shift_steps - 1)
# self._event_ranges = self._special_token_ranges + [self._shift_range
# ] + event_ranges
# # Ensure all event types have unique names.
# assert len(self._event_ranges) == len(
# set([er.type for er in self._event_ranges]))
# @property
# def num_classes(self) -> int:
# return sum(er.max_value - er.min_value + 1 for er in self._event_ranges)
# def encode_event(self, event: Event) -> int:
# """Encode an event to an index."""
# offset = 0
# for er in self._event_ranges:
# if event.type == er.type:
# if not er.min_value <= event.value <= er.max_value:
# raise ValueError(
# f'Event value {event.value} is not within valid range '
# f'[{er.min_value}, {er.max_value}] for type {event.type}'
# )
# return offset + event.value - er.min_value
# offset += er.max_value - er.min_value + 1
# raise ValueError(f'Unknown event type: {event.type}')
# def event_type_range(self, event_type: str) -> Tuple[int, int]:
# """Return [min_id, max_id] for an event type."""
# offset = 0
# for er in self._event_ranges:
# if event_type == er.type:
# return offset, offset + (er.max_value - er.min_value)
# offset += er.max_value - er.min_value + 1
# raise ValueError(f'Unknown event type: {event_type}')
# def decode_event_index(self, index: int) -> Event:
# """Decode an event index to an Event."""
# offset = 0
# for er in self._event_ranges:
# if offset <= index <= offset + er.max_value - er.min_value:
# return Event(type=er.type, value=er.min_value + index - offset)
# offset += er.max_value - er.min_value + 1
# raise ValueError(f'Unknown event index: {index}')
|