Llama-3.2-3B-Instruct-MLC / ndarray-cache.json
rohanprichard's picture
updating files
17e4d3a
raw
history blame
121 kB
{
"metadata": {
"ParamSize": 283,
"ParamBytes": 1807423488.0,
"BitsPerParam": 4.500626782697164
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 197001216,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
128256,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 197001216,
"byteOffset": 0
}
],
"md5sum": "3897af73cdd58ad92272571d1300a477"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 24631296,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
128256,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 24625152,
"byteOffset": 0
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 24625152
}
],
"md5sum": "5ac94bd3abfff44fc23c41e36d66afaa"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "ecc48d1526a57c59700a7a00714f2e44"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17307648
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 25171968
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 26155008
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 30873600
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31463424
}
],
"md5sum": "e1ca271e067e6656ca4ab36edb1ca6d1"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "1c1f6d4dd70274229354544ff522e1fd"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17307648
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 25171968
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 26155008
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 30873600
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31463424
}
],
"md5sum": "1b373ddd9beff49be6525e49294a5855"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f22d16eafe23683b74160ccbd1b53287"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17307648
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 25171968
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 26155008
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 30873600
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31463424
}
],
"md5sum": "2f7948f056ca95c7827c811b3cfe72b4"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "620ff5954a80756608e5da8e163c9e97"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17307648
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 25171968
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 26155008
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 30873600
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31463424
}
],
"md5sum": "b1f2d93d2f26ecf6b14fc86099913227"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "ac5f9c3c8d4aaec671be2a51277d3672"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17307648
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 25171968
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 26155008
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 30873600
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31463424
}
],
"md5sum": "b27e1e1b1f88e2672e4fbc8c9067d930"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "3fb46147509cd1fd2a6d16cac1f319c5"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17307648
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 25171968
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 26155008
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 30873600
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31463424
}
],
"md5sum": "ce1c4a27b33e6e02237f4d4fa8cc87a3"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "6eaa627b6c5add6eb644a3ddf70bd87e"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17307648
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 25171968
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 26155008
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 30873600
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31463424
}
],
"md5sum": "d1b2de557c5de3f79370980f14791b9e"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "7a076964808c2471bd779b3095591cec"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17307648
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 25171968
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 26155008
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 30873600
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31463424
}
],
"md5sum": "4ebcb62c7a0d3a0fa5c902646f1c02c6"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "8647182f23e25657e0eef4226c128358"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17307648
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 25171968
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 26155008
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 30873600
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31463424
}
],
"md5sum": "c976b6646e8b72ea62bcd07dcd21becf"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e063d2b2913d45840aeac82b6253528e"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17307648
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 25171968
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 26155008
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 30873600
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31463424
}
],
"md5sum": "328e006ed4ca26e3a101d07005f619bb"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "ed4aea884d7a52bcf738cd8165f07e47"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17307648
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 25171968
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 26155008
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 30873600
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31463424
}
],
"md5sum": "f133b6e62a2a80933060b943e68cf6c7"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "bcba670de6bed8d2472429796a8aee7c"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17307648
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 25171968
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 26155008
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 30873600
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31463424
}
],
"md5sum": "66c78cf7715273c5a41873d4261d19c1"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b41b91914f85719aa35b99c141d87d48"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "089f7ce8f169578db28a929212269ceb"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 31463424,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17307648
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 25171968
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 26155008
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 30873600
}
],
"md5sum": "1db4c69f3658a4875a58d164db882eb2"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "5ffe8a30519558d7fc43015ff3a7722a"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 31463424,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3145728
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 11010048
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 11993088
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 16711680
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 17307648
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 29890560
}
],
"md5sum": "4ddf2fdda491d56c5473498b08fb015b"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "eb4e7a9e9256852414a565e5f7075440"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 3145728
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3151872
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 11016192
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 11999232
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 16717824
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17307648
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 17313792
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 29896704
}
],
"md5sum": "7ac9e3430016c6b088a3ed27541a6db8"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "66e6ed9813c385d620a4f960c527a627"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 3145728
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3151872
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 11016192
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 11999232
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 16717824
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17307648
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 17313792
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 29896704
}
],
"md5sum": "12f8da0ff107fd5c187ca4bafa48cbd6"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "8cd1567599aa15618c506d9e560d045f"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 3145728
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3151872
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 11016192
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 11999232
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 16717824
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17307648
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 17313792
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 29896704
}
],
"md5sum": "23329609d4a0b9ca1bdd47c5232eea1b"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "2ad30c53be807a6cca1a48d938652e09"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 3145728
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3151872
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 11016192
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 11999232
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 16717824
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17307648
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 17313792
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 29896704
}
],
"md5sum": "88d3eba3ec988210a61f339062eed457"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "2230e24b59fd48363392e27a87cf7aea"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 3145728
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3151872
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 11016192
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 11999232
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 16717824
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17307648
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 17313792
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 29896704
}
],
"md5sum": "cfc09d309063ae2763549b68efa25801"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e12d9abbd9abf3b5d14814102602b367"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 3145728
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3151872
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 11016192
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 11999232
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 16717824
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17307648
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 17313792
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 29896704
}
],
"md5sum": "f167d9a8a0b5b521be424b798e3bba3e"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 31481856,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 3145728
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 3151872
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 11016192
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 11999232
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 16717824
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17307648
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 17313792
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 29896704
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31469568
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31475712
}
],
"md5sum": "b8350a849d54f28dba386ec04f60e85d"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "efefaf4739c17931027b85482678836a"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17307648
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 25171968
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 26155008
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 30873600
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31463424
}
],
"md5sum": "912302f09dfcdb33422ee1ca4fe55632"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "64a20e412576ad25cc775b9acb406751"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17307648
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 25171968
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 26155008
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 30873600
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31463424
}
],
"md5sum": "746d57a96ee03acf6a72b39853317aac"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "03d5b07107c9a325aad9052729809052"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17307648
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 25171968
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 26155008
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 30873600
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31463424
}
],
"md5sum": "f3f2675384ed873ea7c3100b2cdcbb27"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "3afed361f4bc03d92fc60723c0a2ba01"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17307648
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 25171968
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 26155008
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 30873600
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31463424
}
],
"md5sum": "25df7b6e2aae12ca096922cd8ff48e02"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "5526494f18f6243bc2f2cbd5a53d28b7"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17307648
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 25171968
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 26155008
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 30873600
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31463424
}
],
"md5sum": "7c0cf55be3055f7108f471dcd678ce1f"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "cbc8e1289bbfc4e34b8c7bf705994e3a"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17307648
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 25171968
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 26155008
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 30873600
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31463424
}
],
"md5sum": "69ebdc997e003d635d0540d02495c8ad"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "bfa31987e09243a2a6f8e2c99f43428b"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
5120,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 7864320,
"byteOffset": 17307648
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
5120,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 983040,
"byteOffset": 25171968
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 26155008
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 30873600
},
{
"name": "model.norm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31463424
}
],
"md5sum": "615004e4bdffa99900b1cfbdebf3f219"
}
]
}