|
from llama_cpp import Llama |
|
from transformers import AutoTokenizer |
|
|
|
gguf_path = <<gguf_Q5_path>> |
|
llm = Llama(model_path=gguf_path, n_ctx=4096) |
|
tokenizer = AutoTokenizer.from_pretrained("MediaTek-Research/Breeze-7B-Instruct-v1_0") |
|
|
|
def QA(i): |
|
messages = [ |
|
{ |
|
"content":"除了使用防毒軟體,還有哪些方法可以保護自己免受惡意軟體的侵害?", |
|
"role":"user" |
|
} |
|
] |
|
question = tokenizer.apply_chat_template(messages, tokenize=False) |
|
output = llm( |
|
prompt = question, |
|
max_tokens = 1024, |
|
temperature = 0.7, |
|
top_p=0.9, |
|
presence_penalty=1, |
|
frequency_penalty=1 |
|
) |
|
answer = output['choices'][0]['text'] |
|
print(answer) |
|
|
|
if __name__ == '__main__': |
|
for i in range(10): |
|
QA(i) |
|
print("done") |
|
|
|
|