|
from transformers import T5Tokenizer |
|
from typing import List, Optional, Tuple, Union |
|
|
|
class OpenMoeTokenizer(T5Tokenizer): |
|
def __init__(self, *args, **kwargs): |
|
super().__init__(*args, **kwargs) |
|
self.padding_side = 'left' |
|
self.add_bos_token = True |
|
self.add_eos_token = False |
|
|
|
def build_inputs_with_special_tokens( |
|
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None |
|
) -> List[int]: |
|
if self.add_eos_token: |
|
token_ids_0 = self._add_eos_if_not_present(token_ids_0) |
|
if self.add_bos_token: |
|
token_ids_0 = [self.pad_token_id] + token_ids_0 |
|
if token_ids_1 is None: |
|
return token_ids_0 |
|
else: |
|
token_ids_1 = self._add_eos_if_not_present(token_ids_1) |
|
return token_ids_0 + token_ids_1 |