Spaces:
Running
Running
update
Browse files- README.md +0 -3
- vocab/chatglm_6b/__init__.py +7 -11
README.md
CHANGED
@@ -18,9 +18,6 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
|
|
18 |
## TODO
|
19 |
|
20 |
|
21 |
-
- 'MossTokenizer' object has no attribute 'encoder'
|
22 |
-
- chatglmTokenizer
|
23 |
-
|
24 |
|
25 |
|
26 |
|
|
|
18 |
## TODO
|
19 |
|
20 |
|
|
|
|
|
|
|
21 |
|
22 |
|
23 |
|
vocab/chatglm_6b/__init__.py
CHANGED
@@ -6,17 +6,13 @@ import os
|
|
6 |
import config
|
7 |
from transformers import AutoTokenizer
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
# CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
17 |
-
# TOKENIZER_DIR = os.path.join(CURRENT_DIR, "chatglm_6b")
|
18 |
-
# tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_DIR, trust_remote_code=True)
|
19 |
|
20 |
# https://huggingface.co/THUDM/chatglm-6b/blob/main/tokenization_chatglm.py#L153
|
21 |
tokenizer.comments = f"num_image_tokens: {tokenizer.sp_tokenizer.num_image_tokens}; num_image_tokens: {tokenizer.sp_tokenizer.num_text_tokens} "
|
22 |
-
|
|
|
6 |
import config
|
7 |
from transformers import AutoTokenizer
|
8 |
|
9 |
+
if False: # 有bug
|
10 |
+
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
|
11 |
+
else:
|
12 |
+
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
|
13 |
+
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
14 |
+
TOKENIZER_DIR = os.path.join(CURRENT_DIR, "chatglm_6b")
|
15 |
+
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_DIR, trust_remote_code=True)
|
|
|
|
|
|
|
16 |
|
17 |
# https://huggingface.co/THUDM/chatglm-6b/blob/main/tokenization_chatglm.py#L153
|
18 |
tokenizer.comments = f"num_image_tokens: {tokenizer.sp_tokenizer.num_image_tokens}; num_image_tokens: {tokenizer.sp_tokenizer.num_text_tokens} "
|
|