xu song commited on
Commit
931d3ff
1 Parent(s): 55f05a7
Files changed (1) hide show
  1. models/cpp_qwen2.py +7 -4
models/cpp_qwen2.py CHANGED
@@ -4,7 +4,7 @@ https://github.com/awinml/llama-cpp-python-bindings
4
 
5
  python convert_hf_to_gguf.py --outtype f16 Qwen1.5-0.5B-Chat
6
 
7
- python convert_hf_to_gguf.py /workspace/xusong/huggingface/models/Qwen1.5-0.5B-Chat/
8
 
9
 
10
  ./llama-cli -m /workspace/xusong/huggingface/models/Qwen1.5-0.5B-Chat/Qwen1.5-0.5B-Chat-F16.gguf -p "I believe the meaning of life is" -n 128
@@ -37,15 +37,17 @@ import config
37
  class Qwen2Simulator(Simulator):
38
 
39
  def __init__(self):
40
- self.hf_tokenizer = AutoTokenizer.from_pretrained(
41
- "/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct/")
42
-
43
  local_path = "/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct-GGUF/qwen2-0_5b-instruct-fp16.gguf"
44
  if os.path.exists(local_path):
 
 
45
  self.llm = llama_cpp.Llama( # n_ctx, n_threads
46
  model_path=local_path,
 
 
47
  n_ctx=config.MAX_SEQUENCE_LENGTH, #
48
  # n_threads=None, # 默认会根据cpu数来设置 n_threads
 
49
  use_mlock=True,
50
  verbose=True,
51
  )
@@ -53,6 +55,7 @@ class Qwen2Simulator(Simulator):
53
  self.hf_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
54
  self.llm = llama_cpp.Llama.from_pretrained(
55
  repo_id="Qwen/Qwen2-0.5B-Instruct-GGUF",
 
56
  filename="*fp16.gguf",
57
  n_ctx=config.MAX_SEQUENCE_LENGTH,
58
  use_mlock=True,
 
4
 
5
  python convert_hf_to_gguf.py --outtype f16 Qwen1.5-0.5B-Chat
6
 
7
+ python convert_hf_to_gguf.py /workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct/
8
 
9
 
10
  ./llama-cli -m /workspace/xusong/huggingface/models/Qwen1.5-0.5B-Chat/Qwen1.5-0.5B-Chat-F16.gguf -p "I believe the meaning of life is" -n 128
 
37
  class Qwen2Simulator(Simulator):
38
 
39
  def __init__(self):
 
 
 
40
  local_path = "/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct-GGUF/qwen2-0_5b-instruct-fp16.gguf"
41
  if os.path.exists(local_path):
42
+ self.hf_tokenizer = AutoTokenizer.from_pretrained(
43
+ "/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct/")
44
  self.llm = llama_cpp.Llama( # n_ctx, n_threads
45
  model_path=local_path,
46
+ # 默认的tokenizer有bug,tokenize后的id不同
47
+ tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
48
  n_ctx=config.MAX_SEQUENCE_LENGTH, #
49
  # n_threads=None, # 默认会根据cpu数来设置 n_threads
50
+
51
  use_mlock=True,
52
  verbose=True,
53
  )
 
55
  self.hf_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
56
  self.llm = llama_cpp.Llama.from_pretrained(
57
  repo_id="Qwen/Qwen2-0.5B-Instruct-GGUF",
58
+ tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
59
  filename="*fp16.gguf",
60
  n_ctx=config.MAX_SEQUENCE_LENGTH,
61
  use_mlock=True,