banghua amitness commited on
Commit
12875de
1 Parent(s): 5a58bd5

Fix issues in sample code: Invalid reward_tokenizer and issue in returning scores (#1)

Browse files

- Fix issues in sample code: Invalid reward_tokenizer and issue in returning scores (f9c3ba8e1bdeffb59a08db30119e009afa6adbd9)


Co-authored-by: Amit Chaudhary <[email protected]>

Files changed (1) hide show
  1. README.md +1 -2
README.md CHANGED
@@ -92,11 +92,10 @@ class GPTRewardModel(nn.Module):
92
  c_ind = c_inds[0].item() if len(c_inds) > 0 else input_ids.shape[1]
93
  scores.append(rewards[i, c_ind - 1])
94
  return scores
95
- return scores
96
 
97
  ## Load the model and tokenizer
98
 
99
- reward_model = GPTRewardModel("meta-llama/Llama-2-7b-chat-hf", reward_tokenizer.eos_token_id)
100
  reward_tokenizer = reward_model.tokenizer
101
  reward_tokenizer.truncation_side = "left"
102
 
 
92
  c_ind = c_inds[0].item() if len(c_inds) > 0 else input_ids.shape[1]
93
  scores.append(rewards[i, c_ind - 1])
94
  return scores
 
95
 
96
  ## Load the model and tokenizer
97
 
98
+ reward_model = GPTRewardModel("meta-llama/Llama-2-7b-chat-hf")
99
  reward_tokenizer = reward_model.tokenizer
100
  reward_tokenizer.truncation_side = "left"
101