Spaces:

iamkhadke
/

GeneralChatBot

Runtime error

iamkhadke commited on Apr 22, 2023

Commit

b34d408

•

1 Parent(s): e966be2

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -17,7 +17,7 @@ quantization_config = BitsAndBytesConfig(llm_int8_enable_fp32_cpu_offload=True)
 tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-tuned-alpha-3b", device_map="auto", load_in_8bit=True, torch_dtype=torch.float16 )
 m = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-tuned-alpha-3b", device_map= "auto", quantization_config=quantization_config,
                                         offload_folder="./")
-generator = pipeline('text-generation', model=m, tokenizer=tok, device=0)
 print(f"Sucessfully loaded the model to the memory")
 start_message = """<|SYSTEM|># StableAssistant
@@ -51,7 +51,7 @@ def chat(curr_system_message, history):
                 for item in history])
     # Tokenize the messages string
-    model_inputs = tok([messages], return_tensors="pt").to("cuda")
     streamer = TextIteratorStreamer(
         tok, timeout=10., skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(

 tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-tuned-alpha-3b", device_map="auto", load_in_8bit=True, torch_dtype=torch.float16 )
 m = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-tuned-alpha-3b", device_map= "auto", quantization_config=quantization_config,
                                         offload_folder="./")
+generator = pipeline('text-generation', model=m, tokenizer=tok, device=1)
 print(f"Sucessfully loaded the model to the memory")
 start_message = """<|SYSTEM|># StableAssistant
                 for item in history])
     # Tokenize the messages string
+    model_inputs = tok([messages], return_tensors="pt")
     streamer = TextIteratorStreamer(
         tok, timeout=10., skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(