damerajee commited on
Commit
6ffcb74
1 Parent(s): 69ad620

Update modeling_Llamoe.py

Browse files
Files changed (1) hide show
  1. modeling_Llamoe.py +6 -1
modeling_Llamoe.py CHANGED
@@ -561,10 +561,15 @@ class LlamoeSdpaAttention(LlamoeAttention):
561
 
562
  bsz, q_len, _ = hidden_states.size()
563
 
 
 
 
564
  query_states = self.q_proj(hidden_states)
565
  key_states = self.k_proj(hidden_states)
566
  value_states = self.v_proj(hidden_states)
567
-
 
 
568
  query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
569
  key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
570
  value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
 
561
 
562
  bsz, q_len, _ = hidden_states.size()
563
 
564
+ print("bsz:",bsz.shape)
565
+ print("q_len:",q_len.shape)
566
+ print("hidden_states:",hidden_states.shape)
567
  query_states = self.q_proj(hidden_states)
568
  key_states = self.k_proj(hidden_states)
569
  value_states = self.v_proj(hidden_states)
570
+ print("query_states:",query_states.shape)
571
+ print("key_states:",key_states.shape)
572
+ print("value_states:",value_states.shape)
573
  query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
574
  key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
575
  value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)