orionweller
/

test-flex-gpt

oweller2 commited on 14 days ago

Commit

4b203f9

•

1 Parent(s): e0229bb

updates

Files changed (2) hide show

config.json CHANGED Viewed

@@ -2,12 +2,12 @@
   "allow_embedding_resizing": true,
   "architectures": [
     "FlexBertModel",
-    "FlexBertForCasualLM"
   ],
   "auto_map": {
       "AutoConfig": "orionweller/test-flex-gpt--configuration_bert.FlexBertConfig",
       "AutoModel": "orionweller/test-flex-gpt--modeling_flexbert.FlexBertModel",
-      "AutoModelForCausalLM": "orionweller/test-flex-gpt--modeling_flexbert.FlexBertForCasualLM"
   },
   "attention_layer": "rope",
   "attention_probs_dropout_prob": 0.0,

   "allow_embedding_resizing": true,
   "architectures": [
     "FlexBertModel",
+    "FlexBertForCausalLM"
   ],
   "auto_map": {
       "AutoConfig": "orionweller/test-flex-gpt--configuration_bert.FlexBertConfig",
       "AutoModel": "orionweller/test-flex-gpt--modeling_flexbert.FlexBertModel",
+      "AutoModelForCausalLM": "orionweller/test-flex-gpt--modeling_flexbert.FlexBertForCausalLM"
   },
   "attention_layer": "rope",
   "attention_probs_dropout_prob": 0.0,

modeling_flexbert.py CHANGED Viewed

@@ -1534,14 +1534,23 @@ class FlexBertForCausalLM(FlexBertPreTrainedModel):
         self._init_weights(reset_params=False)
     def _init_weights(self, module: Optional[nn.Module] = None, reset_params: Optional[bool] = None):
         assert (module is None) != (reset_params is None), "arg module xor reset_params must be specified"
-        if module:
-            self._init_module_weights(module)
         else:
             assert isinstance(reset_params, bool)
             self.bert._init_weights(reset_params=reset_params)
             self.lm_head._init_weights(reset_params=reset_params)
             if not self.config.tie_word_embeddings:
                 init_weights(self.config, self.decoder, self.config.hidden_size, type_of_module=ModuleType.final_out)
@@ -1742,6 +1751,7 @@ class FlexBertForCausalLM(FlexBertPreTrainedModel):
         params += _count_parameters(self.lm_head, trainable)
         return params
 def init_model_from_pretrained(
     pretrained_model: FlexBertModel,

         self._init_weights(reset_params=False)
     def _init_weights(self, module: Optional[nn.Module] = None, reset_params: Optional[bool] = None):
+        # Handle the XOR condition
         assert (module is None) != (reset_params is None), "arg module xor reset_params must be specified"
+        if module is not None:
+            # Add basic initialization for common module types
+            if isinstance(module, (nn.Linear, nn.Embedding)):
+                module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+                if isinstance(module, nn.Linear) and module.bias is not None:
+                    module.bias.data.zero_()
+            elif isinstance(module, nn.LayerNorm):
+                module.bias.data.zero_()
+                module.weight.data.fill_(1.0)
         else:
             assert isinstance(reset_params, bool)
             self.bert._init_weights(reset_params=reset_params)
             self.lm_head._init_weights(reset_params=reset_params)
             if not self.config.tie_word_embeddings:
                 init_weights(self.config, self.decoder, self.config.hidden_size, type_of_module=ModuleType.final_out)
         params += _count_parameters(self.lm_head, trainable)
         return params
+FlexBertForCausalLM.register_for_auto_class("AutoModelForCausalLM")
 def init_model_from_pretrained(
     pretrained_model: FlexBertModel,