Qwen-14B-Chat-q4f32_1-MLC / ndarray-cache-b16.json
hangruicao's picture
init qwen 14B weight
521a7a4
raw
history blame
202 kB
{
"metadata": {
"ParamSize": 445,
"ParamBytes": 8858030080.0,
"BitsPerParam": 5.001961295228238
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 389283840,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
152064,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 389283840,
"byteOffset": 0
}
],
"md5sum": "9924dae6de4c54615afce8ae05066461"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 48660480,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
152064,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 48660480,
"byteOffset": 0
}
],
"md5sum": "88339473662d52ec1a37a2715afa0b85"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.0.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "ee9431a17d9b83b4bdd98e97aa2439d2"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.0.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "537211d63edb83dbf6c568af82f95285"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 389283840,
"records": [
{
"name": "transformer.wte.q_weight",
"shape": [
152064,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 389283840,
"byteOffset": 0
}
],
"md5sum": "8b8cfdbe3a3a677776ff7a9e363ce0d9"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 48660480,
"records": [
{
"name": "transformer.wte.q_scale",
"shape": [
152064,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 48660480,
"byteOffset": 0
}
],
"md5sum": "324e084c2d9e2c6461b1c42dc465f5e2"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.0.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "6b72494212f00e7a6a750a4eccc441c3"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.1.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "d7252f03f3647a12fc5ec786b1b9a12b"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 32890880,
"records": [
{
"name": "transformer.h.0.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 0
},
{
"name": "transformer.h.0.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 30720
},
{
"name": "transformer.h.0.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4945920
},
{
"name": "transformer.h.0.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18053120
},
{
"name": "transformer.h.0.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19691520
},
{
"name": "transformer.h.0.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19701760
},
{
"name": "transformer.h.0.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 19712000
},
{
"name": "transformer.h.0.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 28477440
},
{
"name": "transformer.h.1.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32860160
}
],
"md5sum": "c7bdc4e1ba73d234884b5aed1a31c56f"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.1.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "c88328f032eb0d236094664130f7e24c"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.1.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "ad37a47ca1b0367bfeb0e8d8708c774c"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.2.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "1ac414f22bea070f0727918b198f964f"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 32860160,
"records": [
{
"name": "transformer.h.1.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.1.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.1.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.1.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.1.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.1.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19681280
},
{
"name": "transformer.h.1.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24064000
},
{
"name": "transformer.h.2.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32829440
}
],
"md5sum": "94f2eb0ea786ad2679fa8773488228ba"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.2.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "b7e6cb2dab84c79b9d2368f1ce6ddaa1"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.2.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "82b55b58573f63d67f067cb09799cabb"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.3.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "b4ad8c927092b1f9eec186dc3f8b0576"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 32860160,
"records": [
{
"name": "transformer.h.2.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.2.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.2.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.2.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.2.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.2.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19681280
},
{
"name": "transformer.h.2.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24064000
},
{
"name": "transformer.h.3.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32829440
}
],
"md5sum": "67eb261d0fa2d11b94e0e908a843b0f7"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.3.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "85da5f11de4fe3d28f4894506f84167d"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.3.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "4ae56825e933c9f7f701786fe0f91d61"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.10.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "e0f3a0f7f97375f623c60a8f338539f0"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 32870400,
"records": [
{
"name": "transformer.h.3.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.3.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.3.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.3.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.3.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.3.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19681280
},
{
"name": "transformer.h.3.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24064000
},
{
"name": "transformer.h.4.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 32829440
},
{
"name": "transformer.h.10.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32839680
}
],
"md5sum": "0cabafe99e059ec0c69286b251dce55c"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.10.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "3ad33a1e132105e97744449d5af46a72"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.10.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "7836627647ca04d6822f13a912a83768"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.11.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "d81d831128a08f49c89ee5e2c700a854"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 32849920,
"records": [
{
"name": "transformer.h.10.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.10.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.10.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.10.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.10.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19671040
},
{
"name": "transformer.h.10.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24053760
},
{
"name": "transformer.h.11.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32819200
}
],
"md5sum": "5f5d334660fad65b7dc6480e89b8b6bf"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.11.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "6a17a528a479b7ba9de612572d9b4eeb"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.11.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "bbd5157c59c410297382b4bee47cbdd7"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.12.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "0a6029076418c278d9264446b9a2fa46"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 32860160,
"records": [
{
"name": "transformer.h.11.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.11.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.11.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.11.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.11.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.11.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19681280
},
{
"name": "transformer.h.11.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24064000
},
{
"name": "transformer.h.12.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32829440
}
],
"md5sum": "602b76b5dbc191663ec7d29a4e36a23d"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.12.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "9bfb30b01eec70d50a0da68e565a85d2"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.12.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "04af42e46111ca664e57348bbd969147"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.7.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "4772f5cb0dbc58d77ff1fda7f01cbc33"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 32880640,
"records": [
{
"name": "transformer.h.12.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.12.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.12.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.12.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.12.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.12.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19681280
},
{
"name": "transformer.h.12.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24064000
},
{
"name": "transformer.h.13.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 32829440
},
{
"name": "transformer.h.10.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 32839680
},
{
"name": "transformer.h.7.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32849920
}
],
"md5sum": "7e9e42e0e0bfaf0294428fdeb32eff1a"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.7.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "154ec2129f5008be74c9c88473da31c0"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.7.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "4bc9664640c048363775de60eceb6729"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.8.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "ddb8f93509ddf4704b40d002e4328522"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 32849920,
"records": [
{
"name": "transformer.h.7.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.7.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.7.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.7.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.7.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19671040
},
{
"name": "transformer.h.7.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24053760
},
{
"name": "transformer.h.8.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32819200
}
],
"md5sum": "771dbf90de3956bb58c84fea5abd6933"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.8.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "58e190e766c68e54279068b2c01b89d4"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.8.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "b558e342db5bf000866ecb38b2c33fc7"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.9.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "76d528af3ab61882d04c14129ecbaa7e"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 32860160,
"records": [
{
"name": "transformer.h.8.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.8.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.8.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.8.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.8.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.8.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19681280
},
{
"name": "transformer.h.8.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24064000
},
{
"name": "transformer.h.9.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32829440
}
],
"md5sum": "b41c1d35686cde59a2594e504fcc3a68"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.9.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "5bbdc0e323687d2d011e0ceb47faff8b"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.9.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "8ec7b2f9c10945b4b1f5e2b28652160b"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.13.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "0187d5461db122e76cbfdd2d33040a0f"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 32860160,
"records": [
{
"name": "transformer.h.9.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.9.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.9.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.9.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.9.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.9.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19681280
},
{
"name": "transformer.h.9.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24064000
},
{
"name": "transformer.h.13.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32829440
}
],
"md5sum": "de93cf594921a28049f9bd86cefbaafa"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.13.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "5ea625bda03ef285ff47510e141625d2"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.13.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "dbb88e60327cf494ca34956740652df0"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.14.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "e246abbdcc9023d7bbbe14bfa4841570"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 32849920,
"records": [
{
"name": "transformer.h.13.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.13.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.13.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.13.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.13.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19671040
},
{
"name": "transformer.h.13.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24053760
},
{
"name": "transformer.h.14.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32819200
}
],
"md5sum": "22c9e2927398e2e24389a6310bda41c9"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.14.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "56e08c3d62cda55baad64ea4ee55d184"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.14.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "5d6c612b1bfe9459f92250d67d37795b"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.15.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "5733ef99fbefae9c94aa5491d39f2e19"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 32860160,
"records": [
{
"name": "transformer.h.14.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.14.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.14.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.14.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.14.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.14.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19681280
},
{
"name": "transformer.h.14.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24064000
},
{
"name": "transformer.h.15.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32829440
}
],
"md5sum": "362560a5cbc994a8e0c475729b81ed06"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.15.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "49edbb03ad45d41e702e24c775ef299a"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.15.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "9795df5a2039599f344afc928abd7b72"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.16.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "f49ac286f6a9b18339cd4966906cddc4"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 32870400,
"records": [
{
"name": "transformer.h.15.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.15.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.15.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.15.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.15.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.15.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19681280
},
{
"name": "transformer.h.15.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24064000
},
{
"name": "transformer.h.16.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 32829440
},
{
"name": "transformer.h.16.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32839680
}
],
"md5sum": "1ee346cd515412f74afea6375906e886"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.16.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "7d118c520c50f57c6169b1ed98fcce7a"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.16.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "84a6024bdf36187d58c1ad3feec3d56b"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.17.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "b2f508e836552e854a071023b55f65cb"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 32849920,
"records": [
{
"name": "transformer.h.16.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.16.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.16.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.16.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.16.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19671040
},
{
"name": "transformer.h.16.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24053760
},
{
"name": "transformer.h.17.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32819200
}
],
"md5sum": "3c414cb65c4e802ff78b9bc15346c424"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.17.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "f393b00ad4b314d17c425b1c22096596"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.17.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "f0ddd28a7cd4e9d79ee46df0655313bd"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.18.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "1fc631e642c4d83f265935a8b8f68fb9"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 32860160,
"records": [
{
"name": "transformer.h.17.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.17.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.17.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.17.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.17.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.17.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19681280
},
{
"name": "transformer.h.17.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24064000
},
{
"name": "transformer.h.18.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32829440
}
],
"md5sum": "7d8446174811d00ca88eff6df93bc597"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.18.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "3fb69fced6c97e4236099245d12eb426"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.18.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "17c8ff566e42467494d562755f609170"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.19.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "1566725ff59a53395373f4574cb2313e"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 32870400,
"records": [
{
"name": "transformer.h.18.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.18.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.18.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.18.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.18.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.18.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19681280
},
{
"name": "transformer.h.18.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24064000
},
{
"name": "transformer.h.19.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 32829440
},
{
"name": "transformer.h.19.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32839680
}
],
"md5sum": "e8fcbc8a1d5a277e398e92cc7a677a1e"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.19.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "5c1a95952256a5498ccfb94b5aa2260d"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.19.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "4423eb0247c9a543c5a79462fd87e0eb"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.20.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "03810ea94fd581815cb70d30e3dc7b4a"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 32849920,
"records": [
{
"name": "transformer.h.19.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.19.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.19.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.19.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.19.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19671040
},
{
"name": "transformer.h.19.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24053760
},
{
"name": "transformer.h.20.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32819200
}
],
"md5sum": "86a515e5a725863863593968efc56935"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.20.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "afe25494a905fadbb88c3a9645eb27e6"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.20.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "e771c7c8760b79e591ae5880afc0053d"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.21.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "cb46f1d8d5bba9c760387b07b78187e9"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 32860160,
"records": [
{
"name": "transformer.h.20.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.20.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.20.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.20.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.20.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.20.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19681280
},
{
"name": "transformer.h.20.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24064000
},
{
"name": "transformer.h.21.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32829440
}
],
"md5sum": "d2bfa31f4edbe05c06ff2d4cd81fb390"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.21.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "3f093ea1a6ba9ddc1269e2951e893335"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.21.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "4a129e267ecde19993252b11bc092c4c"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.22.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "3c100126712a1bec0b381d503bbfbf72"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 32870400,
"records": [
{
"name": "transformer.h.21.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.21.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.21.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.21.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.21.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.21.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19681280
},
{
"name": "transformer.h.21.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24064000
},
{
"name": "transformer.h.22.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 32829440
},
{
"name": "transformer.h.22.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32839680
}
],
"md5sum": "41b612ce189209b218b81314037635ef"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.22.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "a56418a34f4e4c70eb97735631808123"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.22.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "94a1350b04b471f47bc8270d65fb627b"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.23.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "efd753fef61814cba77b344cc53b9212"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 32849920,
"records": [
{
"name": "transformer.h.22.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.22.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.22.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.22.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.22.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19671040
},
{
"name": "transformer.h.22.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24053760
},
{
"name": "transformer.h.23.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32819200
}
],
"md5sum": "a1bab9dc8d12061a979d585126bc2471"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.23.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "b965a19263720dc3c853c2c12f665980"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.23.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "32127bcd4d4ea161cc8ac91cd8d82286"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.24.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "fc1a918f79cf48da60dc3d54822b4471"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 32860160,
"records": [
{
"name": "transformer.h.23.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.23.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.23.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.23.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.23.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.23.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19681280
},
{
"name": "transformer.h.23.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24064000
},
{
"name": "transformer.h.24.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32829440
}
],
"md5sum": "15d2637387a9a4b6ac34e7471a6b0dfd"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.24.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "42e337db1656cd9191ffa874d3860c5c"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.24.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "f643f01316fd7637f83979fe4f4a5994"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.25.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "be68d4c2fad011d74f7b2e5e46e797f0"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 32870400,
"records": [
{
"name": "transformer.h.24.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.24.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.24.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.24.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.24.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.24.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19681280
},
{
"name": "transformer.h.24.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24064000
},
{
"name": "transformer.h.25.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 32829440
},
{
"name": "transformer.h.25.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32839680
}
],
"md5sum": "e954fb0120a16a7fa91a8ddf254433e3"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.25.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "68d175eee215a0a5b7110b5875d5ec71"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.25.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "41aa78ae12b594fa23df8aba895eabff"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.26.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "8281507a523934f0591ef7bfb71244dc"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 32849920,
"records": [
{
"name": "transformer.h.25.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.25.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.25.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.25.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.25.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19671040
},
{
"name": "transformer.h.25.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24053760
},
{
"name": "transformer.h.26.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32819200
}
],
"md5sum": "d837253c7b99f6629e61910c3df01017"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.26.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "47a84cd555481d8fbb57054ddfffeb58"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.26.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "6c83a5f59329e0923b312c830379b0e0"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.27.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "ebb58a78a014c8c594bcf18d30c7b4ad"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 32860160,
"records": [
{
"name": "transformer.h.26.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.26.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.26.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.26.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.26.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.26.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19681280
},
{
"name": "transformer.h.26.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24064000
},
{
"name": "transformer.h.27.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32829440
}
],
"md5sum": "18a7df21df7532130e0ce3e3a0f30199"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.27.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "e1c96efddfa353a029ab00637cace2b2"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.27.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "6317fd6cd8df92618465ba5779ff7044"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.28.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "37eb4e615b7b59861439ac97dcb03538"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 32870400,
"records": [
{
"name": "transformer.h.27.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.27.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.27.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.27.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.27.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.27.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19681280
},
{
"name": "transformer.h.27.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24064000
},
{
"name": "transformer.h.28.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 32829440
},
{
"name": "transformer.h.28.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32839680
}
],
"md5sum": "c37ad6ba8dcc374a9ab9e1943379708b"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.28.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "423378676d449ae03b82b060328312bc"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.28.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "e63e805c892c5dd63da704e3d4d60df4"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.29.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "326543bb857bbda805c00f686f2efdda"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 32849920,
"records": [
{
"name": "transformer.h.28.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.28.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.28.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.28.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.28.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19671040
},
{
"name": "transformer.h.28.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24053760
},
{
"name": "transformer.h.29.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32819200
}
],
"md5sum": "e7a30740f1cfaaebc41ff2cc7f9cf35d"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.29.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "9a87770823982848b2a8d785c0467ef7"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.29.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "9b3b9f366542d5958b5a7632da0d43d6"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.30.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "cf798b227f9a5d6b51a6fca58a3ab76c"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 32860160,
"records": [
{
"name": "transformer.h.29.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.29.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.29.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.29.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.29.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.29.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19681280
},
{
"name": "transformer.h.29.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24064000
},
{
"name": "transformer.h.30.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32829440
}
],
"md5sum": "9370eee805f98c513e84dd73f7aad115"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.30.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "a09dce6ceaf4955a4a029eacbc9262b2"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.30.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "f608e1f10a13c182a7e1f6812a14d806"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.31.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "66d82ee61d40b5f9cb0577a531896147"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 32870400,
"records": [
{
"name": "transformer.h.30.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.30.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.30.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.30.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.30.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.30.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19681280
},
{
"name": "transformer.h.30.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24064000
},
{
"name": "transformer.h.31.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 32829440
},
{
"name": "transformer.h.31.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32839680
}
],
"md5sum": "c9568a1b04035b1eb5415d7b395f0ba4"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.31.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "b3b3f1b7845063465589a8ef2a8a80d4"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.31.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "c75969c1dc97ae3ab733a26ef0352de7"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.32.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "df74d11b514a054e9c68c5f7f4c5f15a"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 32849920,
"records": [
{
"name": "transformer.h.31.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.31.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.31.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.31.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.31.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19671040
},
{
"name": "transformer.h.31.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24053760
},
{
"name": "transformer.h.32.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32819200
}
],
"md5sum": "27ead50f2cd3e52b785c732da0040830"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.32.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "be192cb6d159f7813a0cf5e8dd5178c5"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.32.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "82cc8a93a394e6059d7aca306308f3a5"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.33.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "e848fd18de8c098e8e90b380a51983e3"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 32860160,
"records": [
{
"name": "transformer.h.32.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.32.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.32.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.32.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.32.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.32.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19681280
},
{
"name": "transformer.h.32.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24064000
},
{
"name": "transformer.h.33.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32829440
}
],
"md5sum": "4ec3e90791139e43bbb94e490ff523a1"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.33.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "fc6aac577987525f855681b3755cf0f1"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.33.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "63687adca714c6861265508d9d8aba1d"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.34.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "2c38fe49bb49dbb4a320ded823e8ba8d"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 32870400,
"records": [
{
"name": "transformer.h.33.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.33.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.33.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.33.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.33.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.33.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19681280
},
{
"name": "transformer.h.33.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24064000
},
{
"name": "transformer.h.34.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 32829440
},
{
"name": "transformer.h.34.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32839680
}
],
"md5sum": "ec4e8c3f94e3afd854a8b38a35d72cb9"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.34.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "e359cbd903d54240a0d67cb9a14f15be"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.34.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "42f303933d372455229d8a42135eae8d"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.35.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "8c2dcd44bc8be6358de5b840f1e77d4b"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 32849920,
"records": [
{
"name": "transformer.h.34.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.34.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.34.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.34.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.34.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19671040
},
{
"name": "transformer.h.34.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24053760
},
{
"name": "transformer.h.35.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32819200
}
],
"md5sum": "b109893d7101eb36cd1b8297701aa8fe"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.35.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "af9d0b5ccd5033274aa007467538179e"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.35.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "30b980280c0ebf3ba406bd3557a5b6d4"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.36.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "0c98602a827b55ca3a3be7b424ea9ac3"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 32860160,
"records": [
{
"name": "transformer.h.35.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.35.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.35.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.35.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.35.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.35.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19681280
},
{
"name": "transformer.h.35.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24064000
},
{
"name": "transformer.h.36.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32829440
}
],
"md5sum": "1f68e15c80896acb7699d006c4174685"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.36.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "224fc0184765b20a54c7d8749b6f886b"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.36.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "5cc2ef522e76e2a8a825634e849290e9"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.37.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "19ee4f92095f637d12199f4f0fd576db"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 32870400,
"records": [
{
"name": "transformer.h.36.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.36.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.36.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.36.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.36.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.36.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19681280
},
{
"name": "transformer.h.36.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24064000
},
{
"name": "transformer.h.37.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 32829440
},
{
"name": "transformer.h.37.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32839680
}
],
"md5sum": "0f506a1f65edb8a9306080bb4000726f"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.37.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "f296779fdfe39794398505fe591fad86"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.37.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "38dd99e729075fc31c1013882ba69c50"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.38.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "f8125b6314383c8ed6c47966d92632d8"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 32849920,
"records": [
{
"name": "transformer.h.37.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.37.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.37.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.37.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.37.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19671040
},
{
"name": "transformer.h.37.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24053760
},
{
"name": "transformer.h.38.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32819200
}
],
"md5sum": "d1ee4b7e370ecea81645b52ac4bc713e"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.38.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "8ea1af26e5c68b71aab2e42b61394a87"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.38.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "44090d401acf8796c78359475002a704"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.39.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "42fa7de06043cc49ff3e0b234a5ed40f"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 32860160,
"records": [
{
"name": "transformer.h.38.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.38.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.38.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.38.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.38.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.38.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19681280
},
{
"name": "transformer.h.38.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24064000
},
{
"name": "transformer.h.39.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32829440
}
],
"md5sum": "abf50d18a699519ae9ffa643c00e894b"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.39.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "b8655dff40941641c56e4accfb3801e6"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.39.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "186a8808fbab298e1090750e54222f32"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.4.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "96889b9c475223619d7514f55ef9df72"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 32870400,
"records": [
{
"name": "transformer.h.39.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.39.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.39.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.39.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.39.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.39.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19681280
},
{
"name": "transformer.h.39.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24064000
},
{
"name": "transformer.ln_f.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 32829440
},
{
"name": "transformer.h.4.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32839680
}
],
"md5sum": "335c2c770ef99da13a11bdc59e389087"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.4.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "490eda10b06c0404a0e31cbc1a6de754"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.4.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "da6da39bbfdbf4e3d20673d000570662"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.5.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "2bfc228f3eb02dbcc948a4ea5aed8da9"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 32849920,
"records": [
{
"name": "transformer.h.4.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.4.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.4.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.4.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.4.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19671040
},
{
"name": "transformer.h.4.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24053760
},
{
"name": "transformer.h.5.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32819200
}
],
"md5sum": "18c52e7d8383b663da1178008b601ebd"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.5.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "3eef47186928c5f7abeeb78e6821339c"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.5.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "a558a3d56fb45775daf4c8b9ef6e5603"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "transformer.h.6.attn.c_attn.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "f4139cd0409b2cd37ee064c78c0f34cf"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 32860160,
"records": [
{
"name": "transformer.h.5.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.5.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.5.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.5.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.5.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.5.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19681280
},
{
"name": "transformer.h.5.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24064000
},
{
"name": "transformer.h.6.attn.c_attn.bias",
"shape": [
15360
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 30720,
"byteOffset": 32829440
}
],
"md5sum": "c91a62585c1792f52555e74075479d61"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 35061760,
"records": [
{
"name": "transformer.h.6.mlp.c_proj.q_weight",
"shape": [
5120,
1712
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35061760,
"byteOffset": 0
}
],
"md5sum": "34e32044b39fde438ec4f57083d841c2"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 70123520,
"records": [
{
"name": "transformer.h.6.mlp.gate_up_proj.q_weight",
"shape": [
27392,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70123520,
"byteOffset": 0
}
],
"md5sum": "d2fffe239acd9ec6fa731dbfef9b6300"
},
{
"dataPath": "params_shard_163.bin",
"format": "raw-shard",
"nbytes": 32839680,
"records": [
{
"name": "transformer.h.6.attn.c_attn.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 0
},
{
"name": "transformer.h.6.attn.c_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 4915200
},
{
"name": "transformer.h.6.attn.c_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 18022400
},
{
"name": "transformer.h.6.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19660800
},
{
"name": "transformer.h.6.ln_2.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 19671040
},
{
"name": "transformer.h.6.mlp.c_proj.q_scale",
"shape": [
5120,
428
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4382720,
"byteOffset": 19681280
},
{
"name": "transformer.h.6.mlp.gate_up_proj.q_scale",
"shape": [
27392,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8765440,
"byteOffset": 24064000
},
{
"name": "transformer.h.7.ln_1.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 32829440
}
],
"md5sum": "d9df0d9beaa754df73105d78d630bf9d"
}
]
}