update positional_embedding.py

Relates to bf16 for query_states and key_states issue.
Apply fix in https://huggingface.co/microsoft/Phi-3-small-8k-instruct/commit/f196467b67c13127747a03c142e09aa6841447b8 also for this model

Files changed (1) hide show

positional_embedding.py +3 -3

positional_embedding.py CHANGED Viewed

@@ -269,10 +269,10 @@ class RotaryEmbedding(torch.nn.Module):
         return (
             apply_rotary_pos_emb(
                 q, cos_cached[seqlen_offset:seq_len], sin_cached[seqlen_offset:seq_len], seq_dimension=seq_dimension
-            ),
             apply_rotary_pos_emb(
                 k, cos_cached[seqlen_offset:seq_len], sin_cached[seqlen_offset:seq_len], seq_dimension=seq_dimension
-            ),
         )
     @classmethod
@@ -285,4 +285,4 @@ class RotaryEmbedding(torch.nn.Module):
         )
         if config.rope_scaling is not None:
             kwargs["longrope_config"] = LongRopeConfig.from_dict(config.rope_scaling)
-        return cls(**kwargs)

         return (
             apply_rotary_pos_emb(
                 q, cos_cached[seqlen_offset:seq_len], sin_cached[seqlen_offset:seq_len], seq_dimension=seq_dimension
+            ).to(q.dtype),
             apply_rotary_pos_emb(
                 k, cos_cached[seqlen_offset:seq_len], sin_cached[seqlen_offset:seq_len], seq_dimension=seq_dimension
+            ).to(k.dtype),
         )
     @classmethod
         )
         if config.rope_scaling is not None:
             kwargs["longrope_config"] = LongRopeConfig.from_dict(config.rope_scaling)
+        return cls(**kwargs)