tuandunghcmut commited on
Commit
4d107c7
1 Parent(s): 82dd649

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -10,7 +10,7 @@ import numpy as np
10
  import os
11
 
12
  HF_TOKEN = os.environ['HF_TOKEN']
13
- subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
14
 
15
  # models = {
16
  # "Qwen/Qwen2-VL-2B-Instruct": AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True, torch_dtype="auto", _attn_implementation="flash_attention_2").cuda().eval()
@@ -38,7 +38,7 @@ models = {
38
  trust_remote_code=True,
39
  token=HF_TOKEN,
40
  torch_dtype=torch.bfloat16,
41
- attn_implementation="flash_attention_2"
42
  ).cuda().eval()
43
 
44
  }
 
10
  import os
11
 
12
  HF_TOKEN = os.environ['HF_TOKEN']
13
+ # subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
14
 
15
  # models = {
16
  # "Qwen/Qwen2-VL-2B-Instruct": AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True, torch_dtype="auto", _attn_implementation="flash_attention_2").cuda().eval()
 
38
  trust_remote_code=True,
39
  token=HF_TOKEN,
40
  torch_dtype=torch.bfloat16,
41
+ # attn_implementation="flash_attention_2"
42
  ).cuda().eval()
43
 
44
  }