xu song
commited on
Commit
•
931d3ff
1
Parent(s):
55f05a7
update
Browse files- models/cpp_qwen2.py +7 -4
models/cpp_qwen2.py
CHANGED
@@ -4,7 +4,7 @@ https://github.com/awinml/llama-cpp-python-bindings
|
|
4 |
|
5 |
python convert_hf_to_gguf.py --outtype f16 Qwen1.5-0.5B-Chat
|
6 |
|
7 |
-
python convert_hf_to_gguf.py /workspace/xusong/huggingface/models/
|
8 |
|
9 |
|
10 |
./llama-cli -m /workspace/xusong/huggingface/models/Qwen1.5-0.5B-Chat/Qwen1.5-0.5B-Chat-F16.gguf -p "I believe the meaning of life is" -n 128
|
@@ -37,15 +37,17 @@ import config
|
|
37 |
class Qwen2Simulator(Simulator):
|
38 |
|
39 |
def __init__(self):
|
40 |
-
self.hf_tokenizer = AutoTokenizer.from_pretrained(
|
41 |
-
"/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct/")
|
42 |
-
|
43 |
local_path = "/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct-GGUF/qwen2-0_5b-instruct-fp16.gguf"
|
44 |
if os.path.exists(local_path):
|
|
|
|
|
45 |
self.llm = llama_cpp.Llama( # n_ctx, n_threads
|
46 |
model_path=local_path,
|
|
|
|
|
47 |
n_ctx=config.MAX_SEQUENCE_LENGTH, #
|
48 |
# n_threads=None, # 默认会根据cpu数来设置 n_threads
|
|
|
49 |
use_mlock=True,
|
50 |
verbose=True,
|
51 |
)
|
@@ -53,6 +55,7 @@ class Qwen2Simulator(Simulator):
|
|
53 |
self.hf_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
|
54 |
self.llm = llama_cpp.Llama.from_pretrained(
|
55 |
repo_id="Qwen/Qwen2-0.5B-Instruct-GGUF",
|
|
|
56 |
filename="*fp16.gguf",
|
57 |
n_ctx=config.MAX_SEQUENCE_LENGTH,
|
58 |
use_mlock=True,
|
|
|
4 |
|
5 |
python convert_hf_to_gguf.py --outtype f16 Qwen1.5-0.5B-Chat
|
6 |
|
7 |
+
python convert_hf_to_gguf.py /workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct/
|
8 |
|
9 |
|
10 |
./llama-cli -m /workspace/xusong/huggingface/models/Qwen1.5-0.5B-Chat/Qwen1.5-0.5B-Chat-F16.gguf -p "I believe the meaning of life is" -n 128
|
|
|
37 |
class Qwen2Simulator(Simulator):
|
38 |
|
39 |
def __init__(self):
|
|
|
|
|
|
|
40 |
local_path = "/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct-GGUF/qwen2-0_5b-instruct-fp16.gguf"
|
41 |
if os.path.exists(local_path):
|
42 |
+
self.hf_tokenizer = AutoTokenizer.from_pretrained(
|
43 |
+
"/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct/")
|
44 |
self.llm = llama_cpp.Llama( # n_ctx, n_threads
|
45 |
model_path=local_path,
|
46 |
+
# 默认的tokenizer有bug,tokenize后的id不同
|
47 |
+
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
|
48 |
n_ctx=config.MAX_SEQUENCE_LENGTH, #
|
49 |
# n_threads=None, # 默认会根据cpu数来设置 n_threads
|
50 |
+
|
51 |
use_mlock=True,
|
52 |
verbose=True,
|
53 |
)
|
|
|
55 |
self.hf_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
|
56 |
self.llm = llama_cpp.Llama.from_pretrained(
|
57 |
repo_id="Qwen/Qwen2-0.5B-Instruct-GGUF",
|
58 |
+
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
|
59 |
filename="*fp16.gguf",
|
60 |
n_ctx=config.MAX_SEQUENCE_LENGTH,
|
61 |
use_mlock=True,
|