cannot init tokenizer
#4
by
vinnitu
- opened
ValueError: Couldn't instantiate the backend tokenizer from one of:
(1) a tokenizers
library serialization file,
(2) a slow tokenizer instance to convert or
(3) an equivalent slow tokenizer class to instantiate and convert.
You need to have sentencepiece installed to convert a slow tokenizer to a fast one.
import torch
from transformers import AutoModel, AutoTokenizer
MAX_SEQUENCE_LENGTH = 4096
MODEL_NAME_OR_PATH = "markussagen/xlm-roberta-longformer-base-4096"
tokenizer = AutoTokenizer.from_pretrained(
MODEL_NAME_OR_PATH,
max_length=MAX_SEQUENCE_LENGTH,
padding="max_length",
truncation=True,
)