oweller2
commited on
Commit
•
340e438
1
Parent(s):
70bb53a
remove changes:
Browse files- tokenizer.py +8 -7
tokenizer.py
CHANGED
@@ -7,13 +7,14 @@ class ModernDecoderBERTTokenizer(PreTrainedTokenizerFast):
|
|
7 |
def _batch_encode_plus(self, *args, **kwargs):
|
8 |
outputs = super()._batch_encode_plus(*args, **kwargs)
|
9 |
del outputs["token_type_ids"]
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
|
|
17 |
return outputs
|
18 |
|
19 |
# Register the class
|
|
|
7 |
def _batch_encode_plus(self, *args, **kwargs):
|
8 |
outputs = super()._batch_encode_plus(*args, **kwargs)
|
9 |
del outputs["token_type_ids"]
|
10 |
+
# if the last token is eos, remove it
|
11 |
+
# for key in ['input_ids', 'attention_mask']:
|
12 |
+
# if isinstance(outputs[key], torch.Tensor):
|
13 |
+
# outputs[key] = outputs[key][..., :-1]
|
14 |
+
# elif isinstance(outputs[key], numpy.ndarray):
|
15 |
+
# outputs[key] = outputs[key][..., :-1]
|
16 |
+
# elif isinstance(outputs[key], list):
|
17 |
+
# outputs[key] = [sequence[:-1] for sequence in outputs[key]]
|
18 |
return outputs
|
19 |
|
20 |
# Register the class
|