oweller2 commited on
Commit
340e438
1 Parent(s): 70bb53a

remove changes:

Browse files
Files changed (1) hide show
  1. tokenizer.py +8 -7
tokenizer.py CHANGED
@@ -7,13 +7,14 @@ class ModernDecoderBERTTokenizer(PreTrainedTokenizerFast):
7
  def _batch_encode_plus(self, *args, **kwargs):
8
  outputs = super()._batch_encode_plus(*args, **kwargs)
9
  del outputs["token_type_ids"]
10
- for key in ['input_ids', 'attention_mask']:
11
- if isinstance(outputs[key], torch.Tensor):
12
- outputs[key] = outputs[key][..., :-1]
13
- elif isinstance(outputs[key], numpy.ndarray):
14
- outputs[key] = outputs[key][..., :-1]
15
- elif isinstance(outputs[key], list):
16
- outputs[key] = [sequence[:-1] for sequence in outputs[key]]
 
17
  return outputs
18
 
19
  # Register the class
 
7
  def _batch_encode_plus(self, *args, **kwargs):
8
  outputs = super()._batch_encode_plus(*args, **kwargs)
9
  del outputs["token_type_ids"]
10
+ # if the last token is eos, remove it
11
+ # for key in ['input_ids', 'attention_mask']:
12
+ # if isinstance(outputs[key], torch.Tensor):
13
+ # outputs[key] = outputs[key][..., :-1]
14
+ # elif isinstance(outputs[key], numpy.ndarray):
15
+ # outputs[key] = outputs[key][..., :-1]
16
+ # elif isinstance(outputs[key], list):
17
+ # outputs[key] = [sequence[:-1] for sequence in outputs[key]]
18
  return outputs
19
 
20
  # Register the class