RLHFlow
/

ArmoRM-Llama3-8B-v0.1

Text Classification

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

gabrielmbmb HF staff commited on Aug 1

Commit

3f9f573

•

1 Parent(s): 89047a4

Update device

Files changed (1) hide show

modeling_custom.py +3 -3

modeling_custom.py CHANGED Viewed

@@ -140,11 +140,11 @@ class LlamaForRewardModelWithGating(LlamaPreTrainedModel):
                 # if no pad token found, use modulo instead of reverse indexing for ONNX compatibility
                 sequence_lengths = torch.eq(input_ids, self.config.pad_token_id).int().argmax(-1) - 1
                 sequence_lengths = sequence_lengths % input_ids.shape[-1]
-                sequence_lengths = sequence_lengths.to("cuda")
             else:
                 sequence_lengths = -1
-        dummy_iterator = torch.arange(batch_size, device=tokens_hidden_states.device)
         hidden_states = tokens_hidden_states[dummy_iterator, sequence_lengths]
         assert hidden_states.shape == (batch_size, self.config.hidden_size)
         rewards = self.regression_layer(hidden_states)
@@ -163,4 +163,4 @@ class LlamaForRewardModelWithGating(LlamaPreTrainedModel):
             gating_output=gating_output,
             score=score,
             logits=score,
-        )

                 # if no pad token found, use modulo instead of reverse indexing for ONNX compatibility
                 sequence_lengths = torch.eq(input_ids, self.config.pad_token_id).int().argmax(-1) - 1
                 sequence_lengths = sequence_lengths % input_ids.shape[-1]
+                sequence_lengths = sequence_lengths.to(self.device)
             else:
                 sequence_lengths = -1
+        dummy_iterator = torch.arange(batch_size, device=self.device)
         hidden_states = tokens_hidden_states[dummy_iterator, sequence_lengths]
         assert hidden_states.shape == (batch_size, self.config.hidden_size)
         rewards = self.regression_layer(hidden_states)
             gating_output=gating_output,
             score=score,
             logits=score,
+        )