Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -17,7 +17,7 @@ quantization_config = BitsAndBytesConfig(llm_int8_enable_fp32_cpu_offload=True)
|
|
17 |
tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-tuned-alpha-3b", device_map="auto", load_in_8bit=True, torch_dtype=torch.float16 )
|
18 |
m = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-tuned-alpha-3b", device_map= "auto", quantization_config=quantization_config,
|
19 |
offload_folder="./")
|
20 |
-
generator = pipeline('text-generation', model=m, tokenizer=tok, device=
|
21 |
print(f"Sucessfully loaded the model to the memory")
|
22 |
|
23 |
start_message = """<|SYSTEM|># StableAssistant
|
@@ -51,7 +51,7 @@ def chat(curr_system_message, history):
|
|
51 |
for item in history])
|
52 |
|
53 |
# Tokenize the messages string
|
54 |
-
model_inputs = tok([messages], return_tensors="pt")
|
55 |
streamer = TextIteratorStreamer(
|
56 |
tok, timeout=10., skip_prompt=True, skip_special_tokens=True)
|
57 |
generate_kwargs = dict(
|
|
|
17 |
tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-tuned-alpha-3b", device_map="auto", load_in_8bit=True, torch_dtype=torch.float16 )
|
18 |
m = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-tuned-alpha-3b", device_map= "auto", quantization_config=quantization_config,
|
19 |
offload_folder="./")
|
20 |
+
generator = pipeline('text-generation', model=m, tokenizer=tok, device=1)
|
21 |
print(f"Sucessfully loaded the model to the memory")
|
22 |
|
23 |
start_message = """<|SYSTEM|># StableAssistant
|
|
|
51 |
for item in history])
|
52 |
|
53 |
# Tokenize the messages string
|
54 |
+
model_inputs = tok([messages], return_tensors="pt")
|
55 |
streamer = TextIteratorStreamer(
|
56 |
tok, timeout=10., skip_prompt=True, skip_special_tokens=True)
|
57 |
generate_kwargs = dict(
|