update tokenization.py
Browse files- tokenization_qwen.py +2 -1
tokenization_qwen.py
CHANGED
@@ -42,6 +42,7 @@ SPECIAL_TOKENS = tuple(
|
|
42 |
start=SPECIAL_START_ID,
|
43 |
)
|
44 |
)
|
|
|
45 |
|
46 |
|
47 |
def _load_tiktoken_bpe(tiktoken_bpe_file: str) -> Dict[bytes, int]:
|
@@ -160,7 +161,7 @@ class QWenTokenizer(PreTrainedTokenizer):
|
|
160 |
raise ValueError("Adding regular tokens is not supported")
|
161 |
for token in new_tokens:
|
162 |
surface_form = token.content if isinstance(token, AddedToken) else token
|
163 |
-
if surface_form not in
|
164 |
raise ValueError("Adding unknown special tokens is not supported")
|
165 |
return 0
|
166 |
|
|
|
42 |
start=SPECIAL_START_ID,
|
43 |
)
|
44 |
)
|
45 |
+
SPECIAL_TOKENS_SET = set(t for i, t in SPECIAL_TOKENS)
|
46 |
|
47 |
|
48 |
def _load_tiktoken_bpe(tiktoken_bpe_file: str) -> Dict[bytes, int]:
|
|
|
161 |
raise ValueError("Adding regular tokens is not supported")
|
162 |
for token in new_tokens:
|
163 |
surface_form = token.content if isinstance(token, AddedToken) else token
|
164 |
+
if surface_form not in SPECIAL_TOKENS_SET:
|
165 |
raise ValueError("Adding unknown special tokens is not supported")
|
166 |
return 0
|
167 |
|