THUDM
/

chatglm3-6b

@@ -14,6 +14,7 @@ from torch.nn import CrossEntropyLoss, LayerNorm, MSELoss, BCEWithLogitsLoss
 from torch.nn.utils import skip_init
 from typing import Optional, Tuple, Union, List, Callable, Dict, Any
 from copy import deepcopy
 from transformers.modeling_outputs import (
     BaseModelOutputWithPast,
@@ -45,6 +46,9 @@ CHATGLM_6B_PRETRAINED_MODEL_ARCHIVE_LIST = [
     # See all ChatGLM models at https://huggingface.co/models?filter=chatglm
 ]
 def default_init(cls, *args, **kwargs):
     return cls(*args, **kwargs)
@@ -872,9 +876,19 @@ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
             standardize_cache_format: bool = False,
     ) -> Dict[str, Any]:
         # update past_key_values
-        model_kwargs["past_key_values"] = self._extract_past_from_model_output(
-            outputs, standardize_cache_format=standardize_cache_format
-        )
         # update attention mask
         if "attention_mask" in model_kwargs:

 from torch.nn.utils import skip_init
 from typing import Optional, Tuple, Union, List, Callable, Dict, Any
 from copy import deepcopy
+import transformers
 from transformers.modeling_outputs import (
     BaseModelOutputWithPast,
     # See all ChatGLM models at https://huggingface.co/models?filter=chatglm
 ]
+is_transformers_4_42_or_higher = int(transformers.__version__.split(".")[1]) >= 42
+is_transformers_4_44_or_higher = int(transformers.__version__.split(".")[1]) >= 44
 def default_init(cls, *args, **kwargs):
     return cls(*args, **kwargs)
             standardize_cache_format: bool = False,
     ) -> Dict[str, Any]:
         # update past_key_values
+        if is_transformers_4_44_or_higher:
+            model_kwargs["past_key_values"] = self._extract_past_from_model_output(
+                outputs
+            )[1]
+        elif is_transformers_4_42_or_higher:
+            # update past_key_values
+            model_kwargs["past_key_values"] = self._extract_past_from_model_output(
+                outputs, standardize_cache_format=standardize_cache_format
+            )[1]
+        else:
+            model_kwargs["past_key_values"] = self._extract_past_from_model_output(
+                outputs, standardize_cache_format=standardize_cache_format
+            )
         # update attention mask
         if "attention_mask" in model_kwargs: