Upload README.md
Browse files
README.md
CHANGED
@@ -167,7 +167,8 @@ from transformers import AutoTokenizer
|
|
167 |
model_name_or_path = "TheBloke/Llama-2-7b-Chat-AWQ"
|
168 |
|
169 |
# Load model
|
170 |
-
model = AutoAWQForCausalLM.from_quantized(model_name_or_path, fuse_layers=True,
|
|
|
171 |
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=False)
|
172 |
|
173 |
prompt = "Tell me about AI"
|
@@ -195,9 +196,10 @@ generation_output = model.generate(
|
|
195 |
max_new_tokens=512
|
196 |
)
|
197 |
|
198 |
-
print("Output: ", tokenizer.decode(
|
199 |
|
200 |
# Inference can also be done using transformers' pipeline
|
|
|
201 |
|
202 |
print("*** Pipeline:")
|
203 |
pipe = pipeline(
|
|
|
167 |
model_name_or_path = "TheBloke/Llama-2-7b-Chat-AWQ"
|
168 |
|
169 |
# Load model
|
170 |
+
model = AutoAWQForCausalLM.from_quantized(model_name_or_path, fuse_layers=True,
|
171 |
+
trust_remote_code=False, safetensors=True)
|
172 |
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=False)
|
173 |
|
174 |
prompt = "Tell me about AI"
|
|
|
196 |
max_new_tokens=512
|
197 |
)
|
198 |
|
199 |
+
print("Output: ", tokenizer.decode(generation_output[0]))
|
200 |
|
201 |
# Inference can also be done using transformers' pipeline
|
202 |
+
from transformers import pipeline
|
203 |
|
204 |
print("*** Pipeline:")
|
205 |
pipe = pipeline(
|