abhi-mosaic
commited on
Commit
•
ec40b36
1
Parent(s):
dda3b34
Update README.md
Browse files
README.md
CHANGED
@@ -89,7 +89,8 @@ import transformers
|
|
89 |
name = 'mosaicml/mpt-30b'
|
90 |
|
91 |
config = transformers.AutoConfig.from_pretrained(name, trust_remote_code=True)
|
92 |
-
config.attn_config['attn_impl'] = '
|
|
|
93 |
|
94 |
model = transformers.AutoModelForCausalLM.from_pretrained(
|
95 |
name,
|
|
|
89 |
name = 'mosaicml/mpt-30b'
|
90 |
|
91 |
config = transformers.AutoConfig.from_pretrained(name, trust_remote_code=True)
|
92 |
+
config.attn_config['attn_impl'] = 'triton' # change this to use triton-based FlashAttention
|
93 |
+
config.init_device = 'cuda:0' # For fast initialization directly on GPU!
|
94 |
|
95 |
model = transformers.AutoModelForCausalLM.from_pretrained(
|
96 |
name,
|