Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,13 +7,13 @@ import spaces
|
|
7 |
import torch
|
8 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, pipeline
|
9 |
|
10 |
-
import subprocess
|
11 |
# Install flash attention, skipping CUDA build if necessary
|
12 |
-
subprocess.run(
|
13 |
-
"pip install flash-attn --no-build-isolation",
|
14 |
-
env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
|
15 |
-
shell=True,
|
16 |
-
)
|
17 |
|
18 |
MAX_MAX_NEW_TOKENS = 1024
|
19 |
DEFAULT_MAX_NEW_TOKENS = 512
|
@@ -33,7 +33,8 @@ if not torch.cuda.is_available():
|
|
33 |
|
34 |
|
35 |
if torch.cuda.is_available():
|
36 |
-
model_id = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
|
|
|
37 |
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True,trust_remote_code=True)
|
38 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
39 |
tokenizer.padding_side = 'right'
|
|
|
7 |
import torch
|
8 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, pipeline
|
9 |
|
10 |
+
#import subprocess
|
11 |
# Install flash attention, skipping CUDA build if necessary
|
12 |
+
#subprocess.run(
|
13 |
+
# "pip install flash-attn --no-build-isolation",
|
14 |
+
# env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
|
15 |
+
# shell=True,
|
16 |
+
#)
|
17 |
|
18 |
MAX_MAX_NEW_TOKENS = 1024
|
19 |
DEFAULT_MAX_NEW_TOKENS = 512
|
|
|
33 |
|
34 |
|
35 |
if torch.cuda.is_available():
|
36 |
+
#model_id = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
|
37 |
+
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
|
38 |
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True,trust_remote_code=True)
|
39 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
40 |
tokenizer.padding_side = 'right'
|