zRzRzRzRzRzRzR katuni4ka commited on
Commit
3afaed6
1 Parent(s): 67d005d

update tokenizer for compatibility with new transformers (#64)

Browse files

- update tokenizer for compatibility with new transformers (bbb5eacef5efb6b1d0b75c89296f576417bed4e1)


Co-authored-by: Ekaterina Aidova <[email protected]>

Files changed (1) hide show
  1. tokenization_chatglm.py +3 -0
tokenization_chatglm.py CHANGED
@@ -271,6 +271,8 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
271
  padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
272
  pad_to_multiple_of: Optional[int] = None,
273
  return_attention_mask: Optional[bool] = None,
 
 
274
  ) -> dict:
275
  """
276
  Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
@@ -297,6 +299,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
297
  """
298
  # Load from model defaults
299
  assert self.padding_side == "left"
 
300
 
301
  required_input = encoded_inputs[self.model_input_names[0]]
302
  seq_length = len(required_input)
 
271
  padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
272
  pad_to_multiple_of: Optional[int] = None,
273
  return_attention_mask: Optional[bool] = None,
274
+ padding_side: Optional[bool] = None,
275
+ **kwargs
276
  ) -> dict:
277
  """
278
  Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
 
299
  """
300
  # Load from model defaults
301
  assert self.padding_side == "left"
302
+ assert padding_side is None or padding_side == "left"
303
 
304
  required_input = encoded_inputs[self.model_input_names[0]]
305
  seq_length = len(required_input)