x54-729 commited on
Commit
43367a7
1 Parent(s): add638c

Fix InternLMTokenizer

Browse files
Files changed (1) hide show
  1. tokenization_internlm.py +2 -2
tokenization_internlm.py CHANGED
@@ -65,6 +65,8 @@ class InternLMTokenizer(PreTrainedTokenizer):
65
  **kwargs,
66
  ):
67
  self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
 
 
68
  super().__init__(
69
  bos_token=bos_token,
70
  eos_token=eos_token,
@@ -77,8 +79,6 @@ class InternLMTokenizer(PreTrainedTokenizer):
77
  self.add_bos_token = add_bos_token
78
  self.add_eos_token = add_eos_token
79
  self.decode_with_prefix_space = decode_with_prefix_space
80
- self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
81
- self.sp_model.Load(vocab_file)
82
  self._no_prefix_space_tokens = None
83
 
84
  """ Initialisation"""
 
65
  **kwargs,
66
  ):
67
  self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
68
+ self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
69
+ self.sp_model.Load(vocab_file)
70
  super().__init__(
71
  bos_token=bos_token,
72
  eos_token=eos_token,
 
79
  self.add_bos_token = add_bos_token
80
  self.add_eos_token = add_eos_token
81
  self.decode_with_prefix_space = decode_with_prefix_space
 
 
82
  self._no_prefix_space_tokens = None
83
 
84
  """ Initialisation"""