Spaces:
Running
Running
added model path
Browse files
app.py
CHANGED
@@ -13,12 +13,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
|
|
13 |
from transformers import BitsAndBytesConfig
|
14 |
from tqdm import tqdm
|
15 |
import os
|
16 |
-
|
17 |
-
load_in_4bit = True,
|
18 |
-
bnb_4bit_quant_type="nf4",
|
19 |
-
bnb_4bit_compute_dtype=torch.bfloat16,
|
20 |
-
bnb_4bit_use_double_quant=True,
|
21 |
-
)
|
22 |
|
23 |
USE_PAST_KEY = True
|
24 |
import gc
|
@@ -40,40 +35,35 @@ MODEL_PATH = "Pra-tham/quant_deepseekmath"
|
|
40 |
|
41 |
# DEEP = True
|
42 |
import torch
|
43 |
-
|
44 |
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
|
|
|
|
|
45 |
|
46 |
-
config = AutoConfig.from_pretrained(MODEL_PATH)
|
47 |
-
config.gradient_checkpointing = True
|
48 |
|
49 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
|
50 |
-
|
51 |
-
load_in_4bit = True,
|
52 |
-
bnb_4bit_quant_type="nf4",
|
53 |
-
bnb_4bit_compute_dtype=torch.bfloat16,
|
54 |
-
bnb_4bit_use_double_quant=True,
|
55 |
-
)
|
56 |
model = AutoModelForCausalLM.from_pretrained(
|
57 |
MODEL_PATH,
|
58 |
-
device_map="
|
59 |
torch_dtype="auto",
|
60 |
trust_remote_code=True,
|
61 |
-
|
62 |
-
config=config
|
63 |
)
|
64 |
pipeline = transformers.pipeline(
|
65 |
"text-generation",
|
66 |
model=model,
|
67 |
tokenizer=tokenizer,
|
68 |
torch_dtype='auto',
|
69 |
-
device_map=
|
70 |
)
|
71 |
from transformers import StoppingCriteriaList
|
72 |
|
73 |
class StoppingCriteriaSub(StoppingCriteria):
|
74 |
def __init__(self, stops = [], encounters=1):
|
75 |
super().__init__()
|
76 |
-
self.stops = [stop.to("cuda") for stop in stops]
|
|
|
77 |
|
78 |
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
|
79 |
for stop in self.stops:
|
|
|
13 |
from transformers import BitsAndBytesConfig
|
14 |
from tqdm import tqdm
|
15 |
import os
|
16 |
+
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
USE_PAST_KEY = True
|
19 |
import gc
|
|
|
35 |
|
36 |
# DEEP = True
|
37 |
import torch
|
38 |
+
|
39 |
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
|
40 |
+
import transformers
|
41 |
+
|
42 |
|
|
|
|
|
43 |
|
44 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
|
45 |
+
|
|
|
|
|
|
|
|
|
|
|
46 |
model = AutoModelForCausalLM.from_pretrained(
|
47 |
MODEL_PATH,
|
48 |
+
device_map="cpu",
|
49 |
torch_dtype="auto",
|
50 |
trust_remote_code=True,
|
51 |
+
|
|
|
52 |
)
|
53 |
pipeline = transformers.pipeline(
|
54 |
"text-generation",
|
55 |
model=model,
|
56 |
tokenizer=tokenizer,
|
57 |
torch_dtype='auto',
|
58 |
+
device_map='cpu',
|
59 |
)
|
60 |
from transformers import StoppingCriteriaList
|
61 |
|
62 |
class StoppingCriteriaSub(StoppingCriteria):
|
63 |
def __init__(self, stops = [], encounters=1):
|
64 |
super().__init__()
|
65 |
+
# self.stops = [stop.to("cuda") for stop in stops]
|
66 |
+
self.stops = stops
|
67 |
|
68 |
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
|
69 |
for stop in self.stops:
|