xu song
commited on
Commit
•
5658533
1
Parent(s):
931d3ff
update
Browse files- models/cpp_qwen2.py +3 -4
models/cpp_qwen2.py
CHANGED
@@ -47,8 +47,7 @@ class Qwen2Simulator(Simulator):
|
|
47 |
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
|
48 |
n_ctx=config.MAX_SEQUENCE_LENGTH, #
|
49 |
# n_threads=None, # 默认会根据cpu数来设置 n_threads
|
50 |
-
|
51 |
-
use_mlock=True,
|
52 |
verbose=True,
|
53 |
)
|
54 |
else:
|
@@ -58,7 +57,7 @@ class Qwen2Simulator(Simulator):
|
|
58 |
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
|
59 |
filename="*fp16.gguf",
|
60 |
n_ctx=config.MAX_SEQUENCE_LENGTH,
|
61 |
-
use_mlock=True,
|
62 |
verbose=False,
|
63 |
)
|
64 |
logger.info(f"llm has been initialized: {self.llm}, "
|
@@ -150,6 +149,6 @@ if __name__ == "__main__":
|
|
150 |
|
151 |
message = {"role": "user", "content": generated_text}
|
152 |
print(message)
|
153 |
-
for generated_text, all_tokens in bot.
|
154 |
print(generated_text, all_tokens)
|
155 |
|
|
|
47 |
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
|
48 |
n_ctx=config.MAX_SEQUENCE_LENGTH, #
|
49 |
# n_threads=None, # 默认会根据cpu数来设置 n_threads
|
50 |
+
# use_mlock=True,
|
|
|
51 |
verbose=True,
|
52 |
)
|
53 |
else:
|
|
|
57 |
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
|
58 |
filename="*fp16.gguf",
|
59 |
n_ctx=config.MAX_SEQUENCE_LENGTH,
|
60 |
+
# use_mlock=True,
|
61 |
verbose=False,
|
62 |
)
|
63 |
logger.info(f"llm has been initialized: {self.llm}, "
|
|
|
149 |
|
150 |
message = {"role": "user", "content": generated_text}
|
151 |
print(message)
|
152 |
+
for generated_text, all_tokens in bot.generate_response(message, all_tokens, stream=True):
|
153 |
print(generated_text, all_tokens)
|
154 |
|