File size: 830 Bytes
dafd67c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from transformers import T5Tokenizer
from typing import List, Optional, Tuple, Union

class OpenMoeTokenizer(T5Tokenizer):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.padding_side = 'left'
        self.add_bos_token = True
        self.add_eos_token = False

    def build_inputs_with_special_tokens(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
    ) -> List[int]:
        if self.add_eos_token:
            token_ids_0 = self._add_eos_if_not_present(token_ids_0)
        if self.add_bos_token:
            token_ids_0 = [self.pad_token_id] + token_ids_0
        if token_ids_1 is None:
            return token_ids_0
        else:
            token_ids_1 = self._add_eos_if_not_present(token_ids_1)
            return token_ids_0 + token_ids_1