Pra-tham commited on
Commit
f998c2d
1 Parent(s): 3ad2c61

added model path

Browse files
Files changed (1) hide show
  1. app.py +10 -20
app.py CHANGED
@@ -13,12 +13,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
13
  from transformers import BitsAndBytesConfig
14
  from tqdm import tqdm
15
  import os
16
- quantization_config = BitsAndBytesConfig(
17
- load_in_4bit = True,
18
- bnb_4bit_quant_type="nf4",
19
- bnb_4bit_compute_dtype=torch.bfloat16,
20
- bnb_4bit_use_double_quant=True,
21
- )
22
 
23
  USE_PAST_KEY = True
24
  import gc
@@ -40,40 +35,35 @@ MODEL_PATH = "Pra-tham/quant_deepseekmath"
40
 
41
  # DEEP = True
42
  import torch
43
- from transformers import BitsAndBytesConfig
44
  from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
 
 
45
 
46
- config = AutoConfig.from_pretrained(MODEL_PATH)
47
- config.gradient_checkpointing = True
48
 
49
  tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
50
- quantization_config = BitsAndBytesConfig(
51
- load_in_4bit = True,
52
- bnb_4bit_quant_type="nf4",
53
- bnb_4bit_compute_dtype=torch.bfloat16,
54
- bnb_4bit_use_double_quant=True,
55
- )
56
  model = AutoModelForCausalLM.from_pretrained(
57
  MODEL_PATH,
58
- device_map="sequential",
59
  torch_dtype="auto",
60
  trust_remote_code=True,
61
- quantization_config=quantization_config,
62
- config=config
63
  )
64
  pipeline = transformers.pipeline(
65
  "text-generation",
66
  model=model,
67
  tokenizer=tokenizer,
68
  torch_dtype='auto',
69
- device_map=device_map,
70
  )
71
  from transformers import StoppingCriteriaList
72
 
73
  class StoppingCriteriaSub(StoppingCriteria):
74
  def __init__(self, stops = [], encounters=1):
75
  super().__init__()
76
- self.stops = [stop.to("cuda") for stop in stops]
 
77
 
78
  def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
79
  for stop in self.stops:
 
13
  from transformers import BitsAndBytesConfig
14
  from tqdm import tqdm
15
  import os
16
+
 
 
 
 
 
17
 
18
  USE_PAST_KEY = True
19
  import gc
 
35
 
36
  # DEEP = True
37
  import torch
38
+
39
  from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
40
+ import transformers
41
+
42
 
 
 
43
 
44
  tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
45
+
 
 
 
 
 
46
  model = AutoModelForCausalLM.from_pretrained(
47
  MODEL_PATH,
48
+ device_map="cpu",
49
  torch_dtype="auto",
50
  trust_remote_code=True,
51
+
 
52
  )
53
  pipeline = transformers.pipeline(
54
  "text-generation",
55
  model=model,
56
  tokenizer=tokenizer,
57
  torch_dtype='auto',
58
+ device_map='cpu',
59
  )
60
  from transformers import StoppingCriteriaList
61
 
62
  class StoppingCriteriaSub(StoppingCriteria):
63
  def __init__(self, stops = [], encounters=1):
64
  super().__init__()
65
+ # self.stops = [stop.to("cuda") for stop in stops]
66
+ self.stops = stops
67
 
68
  def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
69
  for stop in self.stops: