zxdu20 commited on
Commit
a7272d4
1 Parent(s): 96de7a2

Fix logit processor

Browse files

Fix tokenizer config saving

Files changed (2) hide show
  1. modeling_chatglm.py +1 -1
  2. tokenization_chatglm.py +9 -3
modeling_chatglm.py CHANGED
@@ -56,7 +56,7 @@ class InvalidScoreLogitsProcessor(LogitsProcessor):
56
  def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
57
  if torch.isnan(scores).any() or torch.isinf(scores).any():
58
  scores.zero_()
59
- scores[..., 20005] = 5e4
60
  return scores
61
 
62
 
 
56
  def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
57
  if torch.isnan(scores).any() or torch.isinf(scores).any():
58
  scores.zero_()
59
+ scores[..., 5] = 5e4
60
  return scores
61
 
62
 
tokenization_chatglm.py CHANGED
@@ -170,9 +170,9 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
170
  vocab_file,
171
  do_lower_case=False,
172
  remove_space=False,
173
- bos_token='sop',
174
- eos_token='eos',
175
- eop_token='eop',
176
  mask_token='[MASK]',
177
  gmask_token='[gMASK]',
178
  padding_side="left",
@@ -183,6 +183,12 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
183
  do_lower_case=do_lower_case,
184
  remove_space=remove_space,
185
  padding_side=padding_side,
 
 
 
 
 
 
186
  **kwargs
187
  )
188
 
 
170
  vocab_file,
171
  do_lower_case=False,
172
  remove_space=False,
173
+ bos_token='<sop>',
174
+ eos_token='</s>',
175
+ eop_token='<eop>',
176
  mask_token='[MASK]',
177
  gmask_token='[gMASK]',
178
  padding_side="left",
 
183
  do_lower_case=do_lower_case,
184
  remove_space=remove_space,
185
  padding_side=padding_side,
186
+ bos_token=bos_token,
187
+ eos_token=eos_token,
188
+ eop_token=eop_token,
189
+ mask_token=mask_token,
190
+ gmask_token=gmask_token,
191
+ num_image_tokens=num_image_tokens,
192
  **kwargs
193
  )
194