Update README.md
Browse files
README.md
CHANGED
@@ -106,12 +106,13 @@ We recommend users of this model to develop guardrails and to take appropriate p
|
|
106 |
|
107 |
This requires a GPU with at least 27GB memory.
|
108 |
|
|
|
|
|
109 |
```python
|
110 |
import torch
|
111 |
from peft import PeftModel, PeftConfig
|
112 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
113 |
|
114 |
-
# load the model
|
115 |
peft_model_id = "dfurman/falcon-40b-chat-oasst1"
|
116 |
config = PeftConfig.from_pretrained(peft_model_id)
|
117 |
|
@@ -134,8 +135,11 @@ tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
|
|
134 |
tokenizer.pad_token = tokenizer.eos_token
|
135 |
|
136 |
model = PeftModel.from_pretrained(model, peft_model_id)
|
|
|
137 |
|
138 |
-
|
|
|
|
|
139 |
prompt = """<human>: My name is Daniel. Write a short email to my closest friends inviting them to come to my home on Friday for a dinner party, I will make the food but tell them to BYOB.
|
140 |
<bot>:"""
|
141 |
|
@@ -149,13 +153,17 @@ batch = batch.to('cuda:0')
|
|
149 |
|
150 |
with torch.cuda.amp.autocast():
|
151 |
output_tokens = model.generate(
|
152 |
-
|
153 |
max_new_tokens=200,
|
154 |
-
|
155 |
-
|
|
|
|
|
|
|
156 |
num_return_sequences=1,
|
157 |
pad_token_id=tokenizer.eos_token_id,
|
158 |
eos_token_id=tokenizer.eos_token_id,
|
|
|
159 |
)
|
160 |
|
161 |
generated_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
|
|
|
106 |
|
107 |
This requires a GPU with at least 27GB memory.
|
108 |
|
109 |
+
### First, Load the Model
|
110 |
+
|
111 |
```python
|
112 |
import torch
|
113 |
from peft import PeftModel, PeftConfig
|
114 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
115 |
|
|
|
116 |
peft_model_id = "dfurman/falcon-40b-chat-oasst1"
|
117 |
config = PeftConfig.from_pretrained(peft_model_id)
|
118 |
|
|
|
135 |
tokenizer.pad_token = tokenizer.eos_token
|
136 |
|
137 |
model = PeftModel.from_pretrained(model, peft_model_id)
|
138 |
+
```
|
139 |
|
140 |
+
### Next, Run the Model
|
141 |
+
|
142 |
+
```python
|
143 |
prompt = """<human>: My name is Daniel. Write a short email to my closest friends inviting them to come to my home on Friday for a dinner party, I will make the food but tell them to BYOB.
|
144 |
<bot>:"""
|
145 |
|
|
|
153 |
|
154 |
with torch.cuda.amp.autocast():
|
155 |
output_tokens = model.generate(
|
156 |
+
inputs=batch.input_ids,
|
157 |
max_new_tokens=200,
|
158 |
+
do_sample=False,
|
159 |
+
use_cache=True,
|
160 |
+
temperature=1.0,
|
161 |
+
top_k=50,
|
162 |
+
top_p=1.0,
|
163 |
num_return_sequences=1,
|
164 |
pad_token_id=tokenizer.eos_token_id,
|
165 |
eos_token_id=tokenizer.eos_token_id,
|
166 |
+
bos_token_id=tokenizer.eos_token_id,
|
167 |
)
|
168 |
|
169 |
generated_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
|