iamkhadke commited on
Commit
b34d408
β€’
1 Parent(s): e966be2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -17,7 +17,7 @@ quantization_config = BitsAndBytesConfig(llm_int8_enable_fp32_cpu_offload=True)
17
  tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-tuned-alpha-3b", device_map="auto", load_in_8bit=True, torch_dtype=torch.float16 )
18
  m = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-tuned-alpha-3b", device_map= "auto", quantization_config=quantization_config,
19
  offload_folder="./")
20
- generator = pipeline('text-generation', model=m, tokenizer=tok, device=0)
21
  print(f"Sucessfully loaded the model to the memory")
22
 
23
  start_message = """<|SYSTEM|># StableAssistant
@@ -51,7 +51,7 @@ def chat(curr_system_message, history):
51
  for item in history])
52
 
53
  # Tokenize the messages string
54
- model_inputs = tok([messages], return_tensors="pt").to("cuda")
55
  streamer = TextIteratorStreamer(
56
  tok, timeout=10., skip_prompt=True, skip_special_tokens=True)
57
  generate_kwargs = dict(
 
17
  tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-tuned-alpha-3b", device_map="auto", load_in_8bit=True, torch_dtype=torch.float16 )
18
  m = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-tuned-alpha-3b", device_map= "auto", quantization_config=quantization_config,
19
  offload_folder="./")
20
+ generator = pipeline('text-generation', model=m, tokenizer=tok, device=1)
21
  print(f"Sucessfully loaded the model to the memory")
22
 
23
  start_message = """<|SYSTEM|># StableAssistant
 
51
  for item in history])
52
 
53
  # Tokenize the messages string
54
+ model_inputs = tok([messages], return_tensors="pt")
55
  streamer = TextIteratorStreamer(
56
  tok, timeout=10., skip_prompt=True, skip_special_tokens=True)
57
  generate_kwargs = dict(