File size: 608 Bytes
58627fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
from transformers import AutoTokenizer

class RerankerTokenizer():
    def __init__(self, total_maxlen, base):
        self.total_maxlen = total_maxlen
        self.tok = AutoTokenizer.from_pretrained(base)

    def tensorize(self, questions, passages):
        assert type(questions) in [list, tuple], type(questions)
        assert type(passages) in [list, tuple], type(passages)

        encoding = self.tok(questions, passages, padding='longest', truncation='longest_first',
                            return_tensors='pt', max_length=self.total_maxlen, add_special_tokens=True)

        return encoding