xu-song commited on
Commit
6bdf6c6
1 Parent(s): 9820e00
Files changed (2) hide show
  1. README.md +0 -3
  2. vocab/chatglm_6b/__init__.py +7 -11
README.md CHANGED
@@ -18,9 +18,6 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
18
  ## TODO
19
 
20
 
21
- - 'MossTokenizer' object has no attribute 'encoder'
22
- - chatglmTokenizer
23
-
24
 
25
 
26
 
 
18
  ## TODO
19
 
20
 
 
 
 
21
 
22
 
23
 
vocab/chatglm_6b/__init__.py CHANGED
@@ -6,17 +6,13 @@ import os
6
  import config
7
  from transformers import AutoTokenizer
8
 
9
-
10
-
11
-
12
- # if config.USE_REMOTE:
13
- tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
14
- # else:
15
- # os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
16
- # CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
17
- # TOKENIZER_DIR = os.path.join(CURRENT_DIR, "chatglm_6b")
18
- # tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_DIR, trust_remote_code=True)
19
 
20
  # https://huggingface.co/THUDM/chatglm-6b/blob/main/tokenization_chatglm.py#L153
21
  tokenizer.comments = f"num_image_tokens: {tokenizer.sp_tokenizer.num_image_tokens}; num_image_tokens: {tokenizer.sp_tokenizer.num_text_tokens} "
22
-
 
6
  import config
7
  from transformers import AutoTokenizer
8
 
9
+ if False: # 有bug
10
+ tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
11
+ else:
12
+ os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
13
+ CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
14
+ TOKENIZER_DIR = os.path.join(CURRENT_DIR, "chatglm_6b")
15
+ tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_DIR, trust_remote_code=True)
 
 
 
16
 
17
  # https://huggingface.co/THUDM/chatglm-6b/blob/main/tokenization_chatglm.py#L153
18
  tokenizer.comments = f"num_image_tokens: {tokenizer.sp_tokenizer.num_image_tokens}; num_image_tokens: {tokenizer.sp_tokenizer.num_text_tokens} "