Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -9,6 +9,8 @@ from threading import Thread
|
|
9 |
|
10 |
print(f"Starting to load the model to memory")
|
11 |
tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-2-zephyr-1_6b", trust_remote_code=True)
|
|
|
|
|
12 |
# using CUDA for an optimal experience
|
13 |
slot="<slot_for_user_input_design_by_xm>"
|
14 |
chat=[{"role": "user", "content": slot}]
|
@@ -16,7 +18,7 @@ sample_input = tok.apply_chat_template(chat, tokenize=False, add_generation_prom
|
|
16 |
input_start_id=sample_input.find(slot)
|
17 |
prefix=sample_input[:input_start_id]
|
18 |
suffix=sample_input[input_start_id+len(slot):]
|
19 |
-
print(tok.
|
20 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
21 |
m = AutoModelForCausalLM.from_pretrained(
|
22 |
"stabilityai/stablelm-2-zephyr-1_6b", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, trust_remote_code=True)
|
|
|
9 |
|
10 |
print(f"Starting to load the model to memory")
|
11 |
tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-2-zephyr-1_6b", trust_remote_code=True)
|
12 |
+
tok.padding_side = "left"
|
13 |
+
tok.pad_token_id = tokenizer.eos_token_id
|
14 |
# using CUDA for an optimal experience
|
15 |
slot="<slot_for_user_input_design_by_xm>"
|
16 |
chat=[{"role": "user", "content": slot}]
|
|
|
18 |
input_start_id=sample_input.find(slot)
|
19 |
prefix=sample_input[:input_start_id]
|
20 |
suffix=sample_input[input_start_id+len(slot):]
|
21 |
+
print(tok.encode(prefix,return_tensors="pt")[0])
|
22 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
23 |
m = AutoModelForCausalLM.from_pretrained(
|
24 |
"stabilityai/stablelm-2-zephyr-1_6b", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, trust_remote_code=True)
|