KaleiNeely
commited on
Commit
•
9b7bad7
1
Parent(s):
e6f6633
Update tokenization_rwkv_world.py
Browse files
tokenization_rwkv_world.py
CHANGED
@@ -122,15 +122,19 @@ class RWKVWorldTokenizer(PreTrainedTokenizer):
|
|
122 |
errors=errors,
|
123 |
**kwargs,
|
124 |
)
|
125 |
-
|
126 |
@property
|
127 |
def eos_token_id(self) -> Optional[int]:
|
128 |
return 0
|
129 |
-
|
130 |
@property
|
131 |
def eot_token_id(self) -> Optional[int]:
|
132 |
return 0
|
133 |
|
|
|
|
|
|
|
|
|
134 |
@property
|
135 |
def vocab_size(self):
|
136 |
return len(self.encoder)
|
@@ -211,7 +215,7 @@ class RWKVWorldTokenizer(PreTrainedTokenizer):
|
|
211 |
return tokens
|
212 |
|
213 |
def decodeBytes(self, tokens):
|
214 |
-
return b
|
215 |
|
216 |
def _tokenize(self, text, **kwargs):
|
217 |
"""Tokenize a string."""
|
|
|
122 |
errors=errors,
|
123 |
**kwargs,
|
124 |
)
|
125 |
+
|
126 |
@property
|
127 |
def eos_token_id(self) -> Optional[int]:
|
128 |
return 0
|
129 |
+
|
130 |
@property
|
131 |
def eot_token_id(self) -> Optional[int]:
|
132 |
return 0
|
133 |
|
134 |
+
@property
|
135 |
+
def pad_token_id(self) -> Optional[int]:
|
136 |
+
return 0
|
137 |
+
|
138 |
@property
|
139 |
def vocab_size(self):
|
140 |
return len(self.encoder)
|
|
|
215 |
return tokens
|
216 |
|
217 |
def decodeBytes(self, tokens):
|
218 |
+
return b"".join(map(lambda i: self.encoder[i], tokens)) # noqa
|
219 |
|
220 |
def _tokenize(self, text, **kwargs):
|
221 |
"""Tokenize a string."""
|