Update modeling_Llamoe.py
Browse files- modeling_Llamoe.py +6 -1
modeling_Llamoe.py
CHANGED
@@ -561,10 +561,15 @@ class LlamoeSdpaAttention(LlamoeAttention):
|
|
561 |
|
562 |
bsz, q_len, _ = hidden_states.size()
|
563 |
|
|
|
|
|
|
|
564 |
query_states = self.q_proj(hidden_states)
|
565 |
key_states = self.k_proj(hidden_states)
|
566 |
value_states = self.v_proj(hidden_states)
|
567 |
-
|
|
|
|
|
568 |
query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
|
569 |
key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
|
570 |
value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
|
|
|
561 |
|
562 |
bsz, q_len, _ = hidden_states.size()
|
563 |
|
564 |
+
print("bsz:",bsz.shape)
|
565 |
+
print("q_len:",q_len.shape)
|
566 |
+
print("hidden_states:",hidden_states.shape)
|
567 |
query_states = self.q_proj(hidden_states)
|
568 |
key_states = self.k_proj(hidden_states)
|
569 |
value_states = self.v_proj(hidden_states)
|
570 |
+
print("query_states:",query_states.shape)
|
571 |
+
print("key_states:",key_states.shape)
|
572 |
+
print("value_states:",value_states.shape)
|
573 |
query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
|
574 |
key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
|
575 |
value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
|