gregH commited on
Commit
04094cf
1 Parent(s): 69a6085

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -1
app.py CHANGED
@@ -9,6 +9,8 @@ from threading import Thread
9
 
10
  print(f"Starting to load the model to memory")
11
  tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-2-zephyr-1_6b", trust_remote_code=True)
 
 
12
  # using CUDA for an optimal experience
13
  slot="<slot_for_user_input_design_by_xm>"
14
  chat=[{"role": "user", "content": slot}]
@@ -16,7 +18,7 @@ sample_input = tok.apply_chat_template(chat, tokenize=False, add_generation_prom
16
  input_start_id=sample_input.find(slot)
17
  prefix=sample_input[:input_start_id]
18
  suffix=sample_input[input_start_id+len(slot):]
19
- print(tok.decode(tok.encode(prefix,return_tensors="pt")[0]))
20
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
21
  m = AutoModelForCausalLM.from_pretrained(
22
  "stabilityai/stablelm-2-zephyr-1_6b", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, trust_remote_code=True)
 
9
 
10
  print(f"Starting to load the model to memory")
11
  tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-2-zephyr-1_6b", trust_remote_code=True)
12
+ tok.padding_side = "left"
13
+ tok.pad_token_id = tokenizer.eos_token_id
14
  # using CUDA for an optimal experience
15
  slot="<slot_for_user_input_design_by_xm>"
16
  chat=[{"role": "user", "content": slot}]
 
18
  input_start_id=sample_input.find(slot)
19
  prefix=sample_input[:input_start_id]
20
  suffix=sample_input[input_start_id+len(slot):]
21
+ print(tok.encode(prefix,return_tensors="pt")[0])
22
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
23
  m = AutoModelForCausalLM.from_pretrained(
24
  "stabilityai/stablelm-2-zephyr-1_6b", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, trust_remote_code=True)