Commit
•
a9df6d8
1
Parent(s):
ecb75e2
Update README.md
Browse files
README.md
CHANGED
@@ -49,13 +49,18 @@ import torch
|
|
49 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
50 |
|
51 |
model_id = "hugging-quants/Meta-Llama-3.1-70B-Instruct-GPTQ-INT4"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
prompt = [
|
53 |
{"role": "system", "content": "You are a helpful assistant, that responds as a pirate."},
|
54 |
{"role": "user", "content": "What's Deep Learning?"},
|
55 |
]
|
56 |
-
|
57 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
58 |
-
|
59 |
inputs = tokenizer.apply_chat_template(
|
60 |
prompt,
|
61 |
tokenize=True,
|
@@ -64,13 +69,6 @@ inputs = tokenizer.apply_chat_template(
|
|
64 |
return_dict=True,
|
65 |
).to("cuda")
|
66 |
|
67 |
-
model = AutoModelForCausalLM.from_pretrained(
|
68 |
-
model_id,
|
69 |
-
torch_dtype=torch.float16,
|
70 |
-
low_cpu_mem_usage=True,
|
71 |
-
device_map="auto",
|
72 |
-
)
|
73 |
-
|
74 |
outputs = model.generate(**inputs, do_sample=True, max_new_tokens=256)
|
75 |
print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
|
76 |
```
|
@@ -92,13 +90,18 @@ from auto_gptq import AutoGPTQForCausalLM
|
|
92 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
93 |
|
94 |
model_id = "hugging-quants/Meta-Llama-3.1-70B-Instruct-GPTQ-INT4"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
prompt = [
|
96 |
{"role": "system", "content": "You are a helpful assistant, that responds as a pirate."},
|
97 |
{"role": "user", "content": "What's Deep Learning?"},
|
98 |
]
|
99 |
-
|
100 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
101 |
-
|
102 |
inputs = tokenizer.apply_chat_template(
|
103 |
prompt,
|
104 |
tokenize=True,
|
@@ -107,13 +110,6 @@ inputs = tokenizer.apply_chat_template(
|
|
107 |
return_dict=True,
|
108 |
).to("cuda")
|
109 |
|
110 |
-
model = AutoGPTQForCausalLM.from_pretrained(
|
111 |
-
model_id,
|
112 |
-
torch_dtype=torch.float16,
|
113 |
-
low_cpu_mem_usage=True,
|
114 |
-
device_map="auto",
|
115 |
-
)
|
116 |
-
|
117 |
outputs = model.generate(**inputs, do_sample=True, max_new_tokens=256)
|
118 |
print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
|
119 |
```
|
|
|
49 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
50 |
|
51 |
model_id = "hugging-quants/Meta-Llama-3.1-70B-Instruct-GPTQ-INT4"
|
52 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
53 |
+
model = AutoModelForCausalLM.from_pretrained(
|
54 |
+
model_id,
|
55 |
+
torch_dtype=torch.float16,
|
56 |
+
low_cpu_mem_usage=True,
|
57 |
+
device_map="auto",
|
58 |
+
)
|
59 |
+
|
60 |
prompt = [
|
61 |
{"role": "system", "content": "You are a helpful assistant, that responds as a pirate."},
|
62 |
{"role": "user", "content": "What's Deep Learning?"},
|
63 |
]
|
|
|
|
|
|
|
64 |
inputs = tokenizer.apply_chat_template(
|
65 |
prompt,
|
66 |
tokenize=True,
|
|
|
69 |
return_dict=True,
|
70 |
).to("cuda")
|
71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
outputs = model.generate(**inputs, do_sample=True, max_new_tokens=256)
|
73 |
print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
|
74 |
```
|
|
|
90 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
91 |
|
92 |
model_id = "hugging-quants/Meta-Llama-3.1-70B-Instruct-GPTQ-INT4"
|
93 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
94 |
+
model = AutoGPTQForCausalLM.from_pretrained(
|
95 |
+
model_id,
|
96 |
+
torch_dtype=torch.float16,
|
97 |
+
low_cpu_mem_usage=True,
|
98 |
+
device_map="auto",
|
99 |
+
)
|
100 |
+
|
101 |
prompt = [
|
102 |
{"role": "system", "content": "You are a helpful assistant, that responds as a pirate."},
|
103 |
{"role": "user", "content": "What's Deep Learning?"},
|
104 |
]
|
|
|
|
|
|
|
105 |
inputs = tokenizer.apply_chat_template(
|
106 |
prompt,
|
107 |
tokenize=True,
|
|
|
110 |
return_dict=True,
|
111 |
).to("cuda")
|
112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
outputs = model.generate(**inputs, do_sample=True, max_new_tokens=256)
|
114 |
print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
|
115 |
```
|