CharlieFRuan's picture
Upload folder using huggingface_hub
2630303 verified
{
"metadata": {
"ParamSize": 399,
"ParamBytes": 1929527296.0,
"BitsPerParam": 5.002114406234114
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 155582464,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
151936,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 155582464,
"byteOffset": 0
}
],
"md5sum": "fda9f3ad990f79e4ad5ce7cbf9bba25a"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "5c172bee39132457d744d1409f75812c"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 32133120,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
151936,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 19447808,
"byteOffset": 0
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19447808
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 19451904
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 30724096
}
],
"md5sum": "40fb89a389eb44e825a12b285ff09ca9"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "d271d6fb298743cc26f5c98aeadeecbf"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 28960768,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2818048
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 2822144
},
{
"name": "model.layers.0.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 2827264
},
{
"name": "model.layers.0.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 5448704
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 5776384
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 7873536
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8135680
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 8139776
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 19411968
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 20820992
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23639040
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23643136
},
{
"name": "model.layers.1.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 23648256
},
{
"name": "model.layers.1.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 26269696
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26597376
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28694528
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28956672
}
],
"md5sum": "ff76cfb2273c800110970be6edc03288"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "3e4c7c1dc71a952aa6c8b6be782bd683"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "82a9e469bf342e2a4e5e281a76569700"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 33502208,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 11272192
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12681216
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15499264
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 15503360
},
{
"name": "model.layers.10.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 15508480
},
{
"name": "model.layers.10.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 18129920
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 18457600
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20554752
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20816896
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20820992
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 32093184
}
],
"md5sum": "864d39baf7b70086bf783ea9842fbdfa"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "e9721e4de2bbfe51c8b5e979d1fee9de"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 28960768,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2818048
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 2822144
},
{
"name": "model.layers.11.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 2827264
},
{
"name": "model.layers.11.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 5448704
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 5776384
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 7873536
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8135680
},
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 8139776
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 19411968
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 20820992
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23639040
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23643136
},
{
"name": "model.layers.12.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 23648256
},
{
"name": "model.layers.12.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 26269696
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26597376
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28694528
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28956672
}
],
"md5sum": "60b8ce1c2affed1e51f4d5b157208108"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "d83d3d51deb5106f6383cadc44670bbe"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "a6923c0e9b15a5d6a72c7de8263aeae4"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 33502208,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 11272192
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12681216
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15499264
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 15503360
},
{
"name": "model.layers.13.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 15508480
},
{
"name": "model.layers.13.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 18129920
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 18457600
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20554752
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20816896
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20820992
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 32093184
}
],
"md5sum": "068cbc345de167ccddbe9fabed1930e4"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "00d3d8622dce76de970fb45064877c36"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 28960768,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2818048
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 2822144
},
{
"name": "model.layers.14.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 2827264
},
{
"name": "model.layers.14.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 5448704
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 5776384
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 7873536
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8135680
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 8139776
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 19411968
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 20820992
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23639040
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23643136
},
{
"name": "model.layers.15.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 23648256
},
{
"name": "model.layers.15.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 26269696
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26597376
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28694528
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28956672
}
],
"md5sum": "d52c875651ec3054134a80193d022ee1"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "2fba818f33e45537152a123fd027c824"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "df34827a58c58061632f605ae7238627"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 33502208,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 11272192
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12681216
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15499264
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 15503360
},
{
"name": "model.layers.16.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 15508480
},
{
"name": "model.layers.16.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 18129920
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 18457600
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20554752
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20816896
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20820992
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 32093184
}
],
"md5sum": "996ac335a40811d58114f10493e9d1fc"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "df3406fa6b41994c6a88f478b03473cb"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 28960768,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2818048
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 2822144
},
{
"name": "model.layers.17.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 2827264
},
{
"name": "model.layers.17.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 5448704
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 5776384
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 7873536
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8135680
},
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 8139776
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 19411968
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 20820992
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23639040
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23643136
},
{
"name": "model.layers.18.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 23648256
},
{
"name": "model.layers.18.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 26269696
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26597376
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28694528
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28956672
}
],
"md5sum": "032f55c582305debba0c4ba8ea2ee99a"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "884fc6bd06655525ccfedbce46eb5b1e"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "91a9a6c0f18a080858ebc1a2c12d5125"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 33502208,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 11272192
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12681216
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15499264
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 15503360
},
{
"name": "model.layers.19.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 15508480
},
{
"name": "model.layers.19.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 18129920
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 18457600
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20554752
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20816896
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20820992
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 32093184
}
],
"md5sum": "34ea1dd0469a8ce3a3af3ddaf9690ce0"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "8f82321929c2ce70fbbc00e5c7ae5131"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 28960768,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2818048
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 2822144
},
{
"name": "model.layers.2.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 2827264
},
{
"name": "model.layers.2.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 5448704
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 5776384
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 7873536
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8135680
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 8139776
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 19411968
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 20820992
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23639040
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23643136
},
{
"name": "model.layers.20.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 23648256
},
{
"name": "model.layers.20.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 26269696
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26597376
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28694528
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28956672
}
],
"md5sum": "59f0808eb4c73f9c3fc3d141da9786d4"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "42e630fd4b00eebd399529140e38841e"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "981fa8927ffed2115bda0649367a3c7a"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 33502208,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 11272192
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12681216
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15499264
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 15503360
},
{
"name": "model.layers.21.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 15508480
},
{
"name": "model.layers.21.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 18129920
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 18457600
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20554752
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20816896
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20820992
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 32093184
}
],
"md5sum": "99df784b181d61de2e5213ab3a745a72"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "bdcdb7ed62905a50a549fcf2b71e314f"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 28960768,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2818048
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 2822144
},
{
"name": "model.layers.22.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 2827264
},
{
"name": "model.layers.22.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 5448704
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 5776384
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 7873536
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8135680
},
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 8139776
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 19411968
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 20820992
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23639040
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23643136
},
{
"name": "model.layers.23.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 23648256
},
{
"name": "model.layers.23.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 26269696
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26597376
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28694528
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28956672
}
],
"md5sum": "e69d03292e1378658ba95d9027e75acb"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "24c0e4624fafed3b6e80afde314ed9ae"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "fcacfb0345c00b4794974d399b99b398"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 33502208,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 11272192
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12681216
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15499264
},
{
"name": "model.layers.24.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 15503360
},
{
"name": "model.layers.24.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 15508480
},
{
"name": "model.layers.24.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 18129920
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 18457600
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20554752
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20816896
},
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20820992
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 32093184
}
],
"md5sum": "0058cdbcf2f29f0c8c9008cf03d764d6"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "b47b30cd98c2fa5770a54173a08365db"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 28960768,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2818048
},
{
"name": "model.layers.25.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 2822144
},
{
"name": "model.layers.25.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 2827264
},
{
"name": "model.layers.25.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 5448704
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 5776384
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 7873536
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8135680
},
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 8139776
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 19411968
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 20820992
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23639040
},
{
"name": "model.layers.26.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23643136
},
{
"name": "model.layers.26.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 23648256
},
{
"name": "model.layers.26.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 26269696
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26597376
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28694528
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28956672
}
],
"md5sum": "a1c0d2af51d815ac854e394f7d2ec327"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "39fff054623676653b7ddd633bb6adcc"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 26134528,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 11272192
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12681216
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15499264
},
{
"name": "model.layers.27.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 15503360
},
{
"name": "model.layers.27.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 15508480
},
{
"name": "model.layers.27.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 18129920
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 18457600
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20554752
},
{
"name": "model.layers.28.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 20816896
},
{
"name": "model.layers.28.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 20822016
},
{
"name": "model.layers.28.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 23443456
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23771136
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 25868288
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 26130432
}
],
"md5sum": "1fffc6d27d9cc240b10ce8b091d1ea5c"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "2ff05aea6a7cd0ca213c564c07125349"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "247ac5e71630a67eaf548697af2f2f2f"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 33502208,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 11272192
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12681216
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15499264
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 15503360
},
{
"name": "model.layers.3.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 15508480
},
{
"name": "model.layers.3.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 18129920
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 18457600
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20554752
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20816896
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20820992
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 32093184
}
],
"md5sum": "a42ef72dddbed4b073772f6d4f08186c"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "77ac5a82c20ad02b596e252097e91a59"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 28960768,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2818048
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 2822144
},
{
"name": "model.layers.4.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 2827264
},
{
"name": "model.layers.4.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 5448704
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 5776384
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 7873536
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8135680
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 8139776
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 19411968
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 20820992
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23639040
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23643136
},
{
"name": "model.layers.5.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 23648256
},
{
"name": "model.layers.5.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 26269696
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26597376
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28694528
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28956672
}
],
"md5sum": "2e399ab5cff08b122e0457ec24f700d7"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "831e2782610ec294352d7514b2d26d30"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "03e5e698f8d856bbb4fc0b2927467783"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 33502208,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 11272192
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12681216
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15499264
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 15503360
},
{
"name": "model.layers.6.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 15508480
},
{
"name": "model.layers.6.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 18129920
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 18457600
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20554752
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20816896
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20820992
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 32093184
}
],
"md5sum": "528cae6e9d10092b4c5069892d14791a"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "508752b8e333419aaa747d3a432609f5"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 28960768,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2818048
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 2822144
},
{
"name": "model.layers.7.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 2827264
},
{
"name": "model.layers.7.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 5448704
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 5776384
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 7873536
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8135680
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 8139776
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 19411968
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 20820992
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23639040
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23643136
},
{
"name": "model.layers.8.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 23648256
},
{
"name": "model.layers.8.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 26269696
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26597376
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28694528
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28956672
}
],
"md5sum": "8d6006a33d90beba0e5cdb6566469608"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "e7c6f4abb3db0d8b0ee443db9ae65974"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "dd6433cd537d2fe52b48441c8cf1f419"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 33502208,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 11272192
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12681216
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15499264
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 15503360
},
{
"name": "model.layers.9.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 15508480
},
{
"name": "model.layers.9.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 18129920
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 18457600
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20554752
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20816896
},
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20820992
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 32093184
}
],
"md5sum": "af7c5887c7970497bdbe2ff767597eaa"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "8f930b512a824b33651b5a61cdb01dad"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 23647232,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2818048
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2822144
},
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 2826240
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 14098432
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 15507456
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18325504
},
{
"name": "model.layers.29.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 18329600
},
{
"name": "model.layers.29.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 18334720
},
{
"name": "model.layers.29.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 20956160
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 21283840
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 23380992
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23643136
}
],
"md5sum": "1d58453be4360a0b7124b09ba9095cd5"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "2b86659a645027cd0bad250a634e554f"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "c3d6b80a7fbbdbe94853190989851134"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 33502208,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 11272192
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12681216
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15499264
},
{
"name": "model.layers.30.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 15503360
},
{
"name": "model.layers.30.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 15508480
},
{
"name": "model.layers.30.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 18129920
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 18457600
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20554752
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20816896
},
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20820992
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 32093184
}
],
"md5sum": "c2096a4c6fcc31fa64fb9a77d6d24431"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "5bacca279f2e2c2b1b8b163efe1b6647"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 28960768,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2818048
},
{
"name": "model.layers.31.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 2822144
},
{
"name": "model.layers.31.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 2827264
},
{
"name": "model.layers.31.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 5448704
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 5776384
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 7873536
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8135680
},
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 8139776
},
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 19411968
},
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 20820992
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23639040
},
{
"name": "model.layers.32.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23643136
},
{
"name": "model.layers.32.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 23648256
},
{
"name": "model.layers.32.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 26269696
},
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26597376
},
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28694528
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28956672
}
],
"md5sum": "107d466099c5f8def3e93087d09a7c79"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "d58f2120bbe70dab7b17e46364fcc5fd"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "02546e437656d4e7929772d5b9a7dab1"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 33502208,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 11272192
},
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12681216
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15499264
},
{
"name": "model.layers.33.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 15503360
},
{
"name": "model.layers.33.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 15508480
},
{
"name": "model.layers.33.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 18129920
},
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 18457600
},
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20554752
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20816896
},
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20820992
},
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 32093184
}
],
"md5sum": "4ca6904e1d8525f0f6ba12f5e7966832"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
22016,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "2cb7a757cceff5e6e5249c9a925f54fe"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 28960768,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2818048
},
{
"name": "model.layers.34.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 2822144
},
{
"name": "model.layers.34.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 2827264
},
{
"name": "model.layers.34.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 5448704
},
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 5776384
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 7873536
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8135680
},
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
2048,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 8139776
},
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
2048,
344
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1409024,
"byteOffset": 19411968
},
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
22016,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 20820992
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 23639040
},
{
"name": "model.layers.35.self_attn.c_attn.bias",
"shape": [
2560
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 23643136
},
{
"name": "model.layers.35.self_attn.c_attn.q_weight",
"shape": [
2560,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2621440,
"byteOffset": 23648256
},
{
"name": "model.layers.35.self_attn.c_attn.q_scale",
"shape": [
2560,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 327680,
"byteOffset": 26269696
},
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26597376
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28694528
},
{
"name": "model.norm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 28956672
}
],
"md5sum": "8bf5c530f740f433a0f3004c9213a85e"
}
]
}