Update README.md
Browse files
README.md
CHANGED
@@ -152,6 +152,26 @@ So, the sum of 100, 520, and 60 is 680.
|
|
152 |
"""
|
153 |
```
|
154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
### INT4 Inference with Transformers and Intel Extension for Transformers
|
156 |
```python
|
157 |
from transformers import AutoTokenizer, TextStreamer
|
|
|
152 |
"""
|
153 |
```
|
154 |
|
155 |
+
### BF16 Inference with Intel Extension for Transformers and Intel Extension for Pytorch
|
156 |
+
```python
|
157 |
+
from transformers import AutoTokenizer, TextStreamer
|
158 |
+
import torch
|
159 |
+
from intel_extension_for_transformers.transformers import AutoModelForCausalLM
|
160 |
+
import intel_extension_for_pytorch as ipex
|
161 |
+
|
162 |
+
model_name = "Intel/neural-chat-7b-v3-3"
|
163 |
+
prompt = "Once upon a time, there existed a little girl,"
|
164 |
+
|
165 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
166 |
+
inputs = tokenizer(prompt, return_tensors="pt").input_ids
|
167 |
+
streamer = TextStreamer(tokenizer)
|
168 |
+
|
169 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
|
170 |
+
model = ipex.optimize(model.eval(), dtype=torch.bfloat16, inplace=True, level="O1", auto_kernel_selection=True)
|
171 |
+
|
172 |
+
outputs = model.generate(inputs, streamer=streamer, max_new_tokens=300)
|
173 |
+
```
|
174 |
+
|
175 |
### INT4 Inference with Transformers and Intel Extension for Transformers
|
176 |
```python
|
177 |
from transformers import AutoTokenizer, TextStreamer
|