FrankC0st1e
commited on
Commit
•
8fa0de6
1
Parent(s):
91a5e0a
change name to minicpm3
Browse files- config.json +7 -7
- configuration_minicpm.py +1 -1
- modeling_minicpm.py +4 -4
config.json
CHANGED
@@ -4,14 +4,14 @@
|
|
4 |
"MiniCPM3ForCausalLM"
|
5 |
],
|
6 |
"auto_map": {
|
7 |
-
"AutoConfig": "configuration_minicpm.
|
8 |
-
"AutoModel": "modeling_minicpm.
|
9 |
-
"AutoModelForCausalLM": "modeling_minicpm.
|
10 |
-
"AutoModelForSeq2SeqLM": "modeling_minicpm.
|
11 |
-
"AutoModelForSequenceClassification": "modeling_minicpm.
|
12 |
},
|
13 |
"bos_token_id": 1,
|
14 |
-
"eos_token_id": 2,
|
15 |
"hidden_act": "silu",
|
16 |
"initializer_range": 0.1,
|
17 |
"hidden_size": 2560,
|
@@ -32,7 +32,7 @@
|
|
32 |
"original_max_position_embeddings": 32768
|
33 |
},
|
34 |
"torch_dtype": "bfloat16",
|
35 |
-
"transformers_version": "4.
|
36 |
"use_cache": true,
|
37 |
"vocab_size": 73448,
|
38 |
"scale_emb": 12,
|
|
|
4 |
"MiniCPM3ForCausalLM"
|
5 |
],
|
6 |
"auto_map": {
|
7 |
+
"AutoConfig": "configuration_minicpm.MiniCPM3Config",
|
8 |
+
"AutoModel": "modeling_minicpm.MiniCPM3Model",
|
9 |
+
"AutoModelForCausalLM": "modeling_minicpm.MiniCPM3ForCausalLM",
|
10 |
+
"AutoModelForSeq2SeqLM": "modeling_minicpm.MiniCPM3ForCausalLM",
|
11 |
+
"AutoModelForSequenceClassification": "modeling_minicpm.MiniCPM3ForSequenceClassification"
|
12 |
},
|
13 |
"bos_token_id": 1,
|
14 |
+
"eos_token_id": [2, 73440],
|
15 |
"hidden_act": "silu",
|
16 |
"initializer_range": 0.1,
|
17 |
"hidden_size": 2560,
|
|
|
32 |
"original_max_position_embeddings": 32768
|
33 |
},
|
34 |
"torch_dtype": "bfloat16",
|
35 |
+
"transformers_version": "4.41.0",
|
36 |
"use_cache": true,
|
37 |
"vocab_size": 73448,
|
38 |
"scale_emb": 12,
|
configuration_minicpm.py
CHANGED
@@ -28,7 +28,7 @@ logger = logging.get_logger(__name__)
|
|
28 |
MINICPM_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
|
29 |
|
30 |
|
31 |
-
class
|
32 |
r"""
|
33 |
This is the configuration class to store the configuration of a [`MiniCPMModel`]. It is used to instantiate an MiniCPM
|
34 |
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
|
|
|
28 |
MINICPM_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
|
29 |
|
30 |
|
31 |
+
class MiniCPM3Config(PretrainedConfig):
|
32 |
r"""
|
33 |
This is the configuration class to store the configuration of a [`MiniCPMModel`]. It is used to instantiate an MiniCPM
|
34 |
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
|
modeling_minicpm.py
CHANGED
@@ -979,7 +979,7 @@ MINICPM_START_DOCSTRING = r"""
|
|
979 |
"The bare MiniCPM Model outputting raw hidden-states without any specific head on top.",
|
980 |
MINICPM_START_DOCSTRING,
|
981 |
)
|
982 |
-
class
|
983 |
config_class = MiniCPMConfig
|
984 |
base_model_prefix = "model"
|
985 |
supports_gradient_checkpointing = True
|
@@ -1075,7 +1075,7 @@ MINICPM_INPUTS_DOCSTRING = r"""
|
|
1075 |
"The bare MiniCPM Model outputting raw hidden-states without any specific head on top.",
|
1076 |
MINICPM_START_DOCSTRING,
|
1077 |
)
|
1078 |
-
class
|
1079 |
"""
|
1080 |
Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`MiniCPMDecoderLayer`]
|
1081 |
|
@@ -1239,7 +1239,7 @@ class MiniCPMModel(MiniCPMPreTrainedModel):
|
|
1239 |
)
|
1240 |
|
1241 |
|
1242 |
-
class
|
1243 |
_tied_weights_keys = ["lm_head.weight"]
|
1244 |
|
1245 |
def __init__(self, config):
|
@@ -1465,7 +1465,7 @@ class MiniCPMForCausalLM(MiniCPMPreTrainedModel):
|
|
1465 |
""",
|
1466 |
MINICPM_START_DOCSTRING,
|
1467 |
)
|
1468 |
-
class
|
1469 |
def __init__(self, config):
|
1470 |
super().__init__(config)
|
1471 |
self.num_labels = config.num_labels
|
|
|
979 |
"The bare MiniCPM Model outputting raw hidden-states without any specific head on top.",
|
980 |
MINICPM_START_DOCSTRING,
|
981 |
)
|
982 |
+
class MiniCPM3PreTrainedModel(PreTrainedModel):
|
983 |
config_class = MiniCPMConfig
|
984 |
base_model_prefix = "model"
|
985 |
supports_gradient_checkpointing = True
|
|
|
1075 |
"The bare MiniCPM Model outputting raw hidden-states without any specific head on top.",
|
1076 |
MINICPM_START_DOCSTRING,
|
1077 |
)
|
1078 |
+
class MiniCPM3Model(MiniCPM3PreTrainedModel):
|
1079 |
"""
|
1080 |
Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`MiniCPMDecoderLayer`]
|
1081 |
|
|
|
1239 |
)
|
1240 |
|
1241 |
|
1242 |
+
class MiniCPM3ForCausalLM(MiniCPM3PreTrainedModel):
|
1243 |
_tied_weights_keys = ["lm_head.weight"]
|
1244 |
|
1245 |
def __init__(self, config):
|
|
|
1465 |
""",
|
1466 |
MINICPM_START_DOCSTRING,
|
1467 |
)
|
1468 |
+
class MiniCPM3ForSequenceClassification(MiniCPM3PreTrainedModel):
|
1469 |
def __init__(self, config):
|
1470 |
super().__init__(config)
|
1471 |
self.num_labels = config.num_labels
|