SmerkyG commited on
Commit
f2ca022
1 Parent(s): 49f029b

Update tokenization_rwkv_world.py

Browse files
Files changed (1) hide show
  1. tokenization_rwkv_world.py +3 -3
tokenization_rwkv_world.py CHANGED
@@ -93,7 +93,7 @@ class RWKVWorldTokenizer(PreTrainedTokenizer):
93
  vocab_files_names = VOCAB_FILES_NAMES
94
  model_input_names = ["input_ids", "attention_mask"]
95
 
96
- def __init__(self, vocab_file, errors="replace", pad_token="\n", **kwargs):
97
  self.add_bos_token = False
98
  self.encoder = {}
99
  sorted = [] # must be already sorted
@@ -133,7 +133,7 @@ class RWKVWorldTokenizer(PreTrainedTokenizer):
133
 
134
  @property
135
  def pad_token_id(self) -> Optional[int]:
136
- return 11
137
 
138
  @property
139
  def vocab_size(self):
@@ -316,7 +316,7 @@ class RWKVWorldTokenizer(PreTrainedTokenizer):
316
  verbose: bool = True,
317
  **kwargs,
318
  ) -> BatchEncoding:
319
- def get_input_ids(text, max_length=None, pad_token_id=11):
320
  def pad_sequence(seq, max_len, pad_tok):
321
  return [pad_tok] * (max_len - len(seq)) + seq
322
 
 
93
  vocab_files_names = VOCAB_FILES_NAMES
94
  model_input_names = ["input_ids", "attention_mask"]
95
 
96
+ def __init__(self, vocab_file, errors="replace", pad_token="0", **kwargs):
97
  self.add_bos_token = False
98
  self.encoder = {}
99
  sorted = [] # must be already sorted
 
133
 
134
  @property
135
  def pad_token_id(self) -> Optional[int]:
136
+ return 0
137
 
138
  @property
139
  def vocab_size(self):
 
316
  verbose: bool = True,
317
  **kwargs,
318
  ) -> BatchEncoding:
319
+ def get_input_ids(text, max_length=None, pad_token_id=0):
320
  def pad_sequence(seq, max_len, pad_tok):
321
  return [pad_tok] * (max_len - len(seq)) + seq
322