edia_lmodels_en / modules /module_languageModel.py
nanom's picture
Disabling parallelism to tring avoid deadlocks in the hf tokenizer
ce7ab31
raw
history blame contribute delete
No virus
596 Bytes
from transformers import AutoTokenizer, AutoModelForMaskedLM
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
class LanguageModel:
def __init__(
self,
model_name
) -> None:
print("Downloading language model...")
self.__tokenizer = AutoTokenizer.from_pretrained(model_name)
self.__model = AutoModelForMaskedLM.from_pretrained(model_name)
def initTokenizer(
self
) -> AutoTokenizer:
return self.__tokenizer
def initModel(
self
) -> AutoModelForMaskedLM:
return self.__model