yuyijiong
/

Qwen-14b-chat-yarn-32k

Text Generation

Model card Files Files and versions Community

yuyijiong commited on Jan 8

Commit

815cfe7

•

1 Parent(s): a445828

Update modeling_qwen_yarn.py

Files changed (1) hide show

modeling_qwen_yarn.py +4 -0

modeling_qwen_yarn.py CHANGED Viewed

@@ -1156,6 +1156,10 @@ class QWenLMHeadModel(QWenPreTrainedModel):
             output = (lm_logits,) + transformer_outputs[1:]
             return ((loss,) + output) if loss is not None else output
         return CausalLMOutputWithPast(
             loss=loss,
             logits=lm_logits,

             output = (lm_logits,) + transformer_outputs[1:]
             return ((loss,) + output) if loss is not None else output
+        #训练时节约显存
+        # if self.training:
+            # lm_logits=None
         return CausalLMOutputWithPast(
             loss=loss,
             logits=lm_logits,