Spaces:
Running
on
Zero
Running
on
Zero
tuandunghcmut
commited on
Commit
•
4d107c7
1
Parent(s):
82dd649
Update app.py
Browse files
app.py
CHANGED
@@ -10,7 +10,7 @@ import numpy as np
|
|
10 |
import os
|
11 |
|
12 |
HF_TOKEN = os.environ['HF_TOKEN']
|
13 |
-
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
14 |
|
15 |
# models = {
|
16 |
# "Qwen/Qwen2-VL-2B-Instruct": AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True, torch_dtype="auto", _attn_implementation="flash_attention_2").cuda().eval()
|
@@ -38,7 +38,7 @@ models = {
|
|
38 |
trust_remote_code=True,
|
39 |
token=HF_TOKEN,
|
40 |
torch_dtype=torch.bfloat16,
|
41 |
-
attn_implementation="flash_attention_2"
|
42 |
).cuda().eval()
|
43 |
|
44 |
}
|
|
|
10 |
import os
|
11 |
|
12 |
HF_TOKEN = os.environ['HF_TOKEN']
|
13 |
+
# subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
14 |
|
15 |
# models = {
|
16 |
# "Qwen/Qwen2-VL-2B-Instruct": AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True, torch_dtype="auto", _attn_implementation="flash_attention_2").cuda().eval()
|
|
|
38 |
trust_remote_code=True,
|
39 |
token=HF_TOKEN,
|
40 |
torch_dtype=torch.bfloat16,
|
41 |
+
# attn_implementation="flash_attention_2"
|
42 |
).cuda().eval()
|
43 |
|
44 |
}
|