SmolLM-135M-Instruct-q0f16-MLC / ndarray-cache.json
CharlieFRuan's picture
Upload folder using huggingface_hub
7a7ec28 verified
raw
history blame
70.3 kB
{
"metadata": {
"ParamSize": 182,
"ParamBytes": 269030016.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 56623104,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
49152,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 56623104,
"byteOffset": 0
}
],
"md5sum": "141f0fd078335e7280675bf001e765d7"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 30091392,
"records": [
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 1152
},
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 1770624
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 5309568
},
{
"name": "model.layers.0.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 5310720
},
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 6416640
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 7080192
},
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 7081344
},
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 8850816
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 12389760
},
{
"name": "model.layers.1.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 12390912
},
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 13496832
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 14160384
},
{
"name": "model.layers.10.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14161536
},
{
"name": "model.layers.10.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 15931008
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 19469952
},
{
"name": "model.layers.10.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 19471104
},
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 20577024
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 21240576
},
{
"name": "model.layers.11.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 21241728
},
{
"name": "model.layers.11.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 23011200
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 26550144
},
{
"name": "model.layers.11.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 26551296
},
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 27657216
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 28320768
},
{
"name": "model.layers.12.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 28321920
}
],
"md5sum": "8c3c1c10e3542049f9b5c4407d6a8b73"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 32966784,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 0
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 3538944
},
{
"name": "model.layers.12.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 3540096
},
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 4646016
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 5309568
},
{
"name": "model.layers.13.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 5310720
},
{
"name": "model.layers.13.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 7080192
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 10619136
},
{
"name": "model.layers.13.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 10620288
},
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 11726208
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 12389760
},
{
"name": "model.layers.14.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 12390912
},
{
"name": "model.layers.14.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 14160384
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 17699328
},
{
"name": "model.layers.14.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 17700480
},
{
"name": "model.layers.14.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 18806400
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 19469952
},
{
"name": "model.layers.15.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19471104
},
{
"name": "model.layers.15.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 21240576
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 24779520
},
{
"name": "model.layers.15.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 24780672
},
{
"name": "model.layers.15.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 25886592
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 26550144
},
{
"name": "model.layers.16.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 26551296
},
{
"name": "model.layers.16.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 28320768
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 31859712
},
{
"name": "model.layers.16.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 31860864
}
],
"md5sum": "6519e54a2545395d82025fcf1405074f"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 30754944,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 0
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 663552
},
{
"name": "model.layers.17.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 664704
},
{
"name": "model.layers.17.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 2434176
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 5973120
},
{
"name": "model.layers.17.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 5974272
},
{
"name": "model.layers.17.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 7080192
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 7743744
},
{
"name": "model.layers.18.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 7744896
},
{
"name": "model.layers.18.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 9514368
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 13053312
},
{
"name": "model.layers.18.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 13054464
},
{
"name": "model.layers.18.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 14160384
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 14823936
},
{
"name": "model.layers.19.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14825088
},
{
"name": "model.layers.19.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 16594560
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 20133504
},
{
"name": "model.layers.19.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 20134656
},
{
"name": "model.layers.19.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 21240576
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 21904128
},
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 21905280
},
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 23674752
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 27213696
},
{
"name": "model.layers.2.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 27214848
},
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 28320768
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 28984320
},
{
"name": "model.layers.20.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 28985472
}
],
"md5sum": "c5cdf4241a2746a9ae96a1fcd38def48"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 32966784,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 0
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 3538944
},
{
"name": "model.layers.20.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 3540096
},
{
"name": "model.layers.20.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 4646016
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 5309568
},
{
"name": "model.layers.21.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 5310720
},
{
"name": "model.layers.21.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 7080192
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 10619136
},
{
"name": "model.layers.21.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 10620288
},
{
"name": "model.layers.21.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 11726208
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 12389760
},
{
"name": "model.layers.22.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 12390912
},
{
"name": "model.layers.22.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 14160384
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 17699328
},
{
"name": "model.layers.22.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 17700480
},
{
"name": "model.layers.22.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 18806400
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 19469952
},
{
"name": "model.layers.23.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19471104
},
{
"name": "model.layers.23.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 21240576
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 24779520
},
{
"name": "model.layers.23.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 24780672
},
{
"name": "model.layers.23.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 25886592
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 26550144
},
{
"name": "model.layers.24.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 26551296
},
{
"name": "model.layers.24.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 28320768
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 31859712
},
{
"name": "model.layers.24.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 31860864
}
],
"md5sum": "ac1e816d1b0e2b856a8051bc989d538a"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 30754944,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 0
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 663552
},
{
"name": "model.layers.25.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 664704
},
{
"name": "model.layers.25.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 2434176
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 5973120
},
{
"name": "model.layers.25.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 5974272
},
{
"name": "model.layers.25.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 7080192
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 7743744
},
{
"name": "model.layers.26.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 7744896
},
{
"name": "model.layers.26.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 9514368
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 13053312
},
{
"name": "model.layers.26.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 13054464
},
{
"name": "model.layers.26.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 14160384
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 14823936
},
{
"name": "model.layers.27.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14825088
},
{
"name": "model.layers.27.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 16594560
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 20133504
},
{
"name": "model.layers.27.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 20134656
},
{
"name": "model.layers.27.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 21240576
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 21904128
},
{
"name": "model.layers.28.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 21905280
},
{
"name": "model.layers.28.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 23674752
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 27213696
},
{
"name": "model.layers.28.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 27214848
},
{
"name": "model.layers.28.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 28320768
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 28984320
},
{
"name": "model.layers.29.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 28985472
}
],
"md5sum": "04c8381ac24caf05a76dcaaa0e803fd7"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 32966784,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 0
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 3538944
},
{
"name": "model.layers.29.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 3540096
},
{
"name": "model.layers.29.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 4646016
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 5309568
},
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 5310720
},
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 7080192
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 10619136
},
{
"name": "model.layers.3.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 10620288
},
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 11726208
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 12389760
},
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 12390912
},
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 14160384
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 17699328
},
{
"name": "model.layers.4.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 17700480
},
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 18806400
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 19469952
},
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19471104
},
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 21240576
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 24779520
},
{
"name": "model.layers.5.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 24780672
},
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 25886592
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 26550144
},
{
"name": "model.layers.6.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 26551296
},
{
"name": "model.layers.6.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 28320768
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 31859712
},
{
"name": "model.layers.6.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 31860864
}
],
"md5sum": "07720a49a20ad070ec3509804021892e"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 21905280,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 0
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 663552
},
{
"name": "model.layers.7.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 664704
},
{
"name": "model.layers.7.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 2434176
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 5973120
},
{
"name": "model.layers.7.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 5974272
},
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 7080192
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 7743744
},
{
"name": "model.layers.8.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 7744896
},
{
"name": "model.layers.8.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 9514368
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 13053312
},
{
"name": "model.layers.8.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 13054464
},
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 14160384
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 14823936
},
{
"name": "model.layers.9.mlp.down_proj.weight",
"shape": [
576,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14825088
},
{
"name": "model.layers.9.mlp.gate_up_proj.weight",
"shape": [
3072,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3538944,
"byteOffset": 16594560
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 20133504
},
{
"name": "model.layers.9.self_attn.qkv_proj.weight",
"shape": [
960,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1105920,
"byteOffset": 20134656
},
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
576,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 21240576
},
{
"name": "model.norm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 21904128
}
],
"md5sum": "80c76f8ae646867d38922ce541bf65a1"
}
]
}