欧卫
'add_app_files'
58627fa
raw
history blame contribute delete
608 Bytes
from transformers import AutoTokenizer
class RerankerTokenizer():
def __init__(self, total_maxlen, base):
self.total_maxlen = total_maxlen
self.tok = AutoTokenizer.from_pretrained(base)
def tensorize(self, questions, passages):
assert type(questions) in [list, tuple], type(questions)
assert type(passages) in [list, tuple], type(passages)
encoding = self.tok(questions, passages, padding='longest', truncation='longest_first',
return_tensors='pt', max_length=self.total_maxlen, add_special_tokens=True)
return encoding