Spaces:

xu-song
/

self-chat

Running

App Files Files Community

xu song commited on Aug 5

Commit

931d3ff

•

1 Parent(s): 55f05a7

update

Browse files

Files changed (1) hide show

models/cpp_qwen2.py +7 -4

models/cpp_qwen2.py CHANGED Viewed

@@ -4,7 +4,7 @@ https://github.com/awinml/llama-cpp-python-bindings
 python convert_hf_to_gguf.py --outtype f16 Qwen1.5-0.5B-Chat
-python convert_hf_to_gguf.py /workspace/xusong/huggingface/models/Qwen1.5-0.5B-Chat/
 ./llama-cli -m /workspace/xusong/huggingface/models/Qwen1.5-0.5B-Chat/Qwen1.5-0.5B-Chat-F16.gguf -p "I believe the meaning of life is" -n 128
@@ -37,15 +37,17 @@ import config
 class Qwen2Simulator(Simulator):
     def __init__(self):
-        self.hf_tokenizer = AutoTokenizer.from_pretrained(
-            "/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct/")
         local_path = "/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct-GGUF/qwen2-0_5b-instruct-fp16.gguf"
         if os.path.exists(local_path):
             self.llm = llama_cpp.Llama(  # n_ctx, n_threads
                 model_path=local_path,
                 n_ctx=config.MAX_SEQUENCE_LENGTH,  #
                 # n_threads=None, # 默认会根据cpu数来设置 n_threads
                 use_mlock=True,
                 verbose=True,
             )
@@ -53,6 +55,7 @@ class Qwen2Simulator(Simulator):
             self.hf_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
             self.llm = llama_cpp.Llama.from_pretrained(
                 repo_id="Qwen/Qwen2-0.5B-Instruct-GGUF",
                 filename="*fp16.gguf",
                 n_ctx=config.MAX_SEQUENCE_LENGTH,
                 use_mlock=True,

 python convert_hf_to_gguf.py --outtype f16 Qwen1.5-0.5B-Chat
+python convert_hf_to_gguf.py /workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct/
 ./llama-cli -m /workspace/xusong/huggingface/models/Qwen1.5-0.5B-Chat/Qwen1.5-0.5B-Chat-F16.gguf -p "I believe the meaning of life is" -n 128
 class Qwen2Simulator(Simulator):
     def __init__(self):
         local_path = "/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct-GGUF/qwen2-0_5b-instruct-fp16.gguf"
         if os.path.exists(local_path):
+            self.hf_tokenizer = AutoTokenizer.from_pretrained(
+                "/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct/")
             self.llm = llama_cpp.Llama(  # n_ctx, n_threads
                 model_path=local_path,
+                # 默认的tokenizer有bug，tokenize后的id不同
+                tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
                 n_ctx=config.MAX_SEQUENCE_LENGTH,  #
                 # n_threads=None, # 默认会根据cpu数来设置 n_threads
                 use_mlock=True,
                 verbose=True,
             )
             self.hf_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
             self.llm = llama_cpp.Llama.from_pretrained(
                 repo_id="Qwen/Qwen2-0.5B-Instruct-GGUF",
+                tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
                 filename="*fp16.gguf",
                 n_ctx=config.MAX_SEQUENCE_LENGTH,
                 use_mlock=True,