|
{ |
|
"metadata": { |
|
"ParamSize": 325, |
|
"ParamBytes": 2388848640.0, |
|
"BitsPerParam": 5.001410952042906 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 49250304, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_weight", |
|
"shape": [ |
|
32064, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 49250304, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c48ad0d40f422790c21a880c1043fc81" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.21.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8b9db0882d61aef6f8fca7c2bad2bcfc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23470080, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_scale", |
|
"shape": [ |
|
32064, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6156288, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.21.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 6156288 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 6162432 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 18745344 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 20318208 |
|
}, |
|
{ |
|
"name": "transformer.h.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 23463936 |
|
} |
|
], |
|
"md5sum": "6ee2a7e9ee333e0a9bb2ffbb00df0545" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.22.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e73dee0e275ac84789cb48b10707defa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33239040, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.21.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.22.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 15925248 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 15931392 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 28514304 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 30087168 |
|
}, |
|
{ |
|
"name": "transformer.h.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 33232896 |
|
} |
|
], |
|
"md5sum": "c9426a31c4ebff2a225c6c63ef38cbc2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21239808, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.22.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 4718592 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 5308416 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 19464192 |
|
}, |
|
{ |
|
"name": "transformer.h.23.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 21233664 |
|
} |
|
], |
|
"md5sum": "f80db92812944298816b016eb320cd22" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.23.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "70aa2a56b1847beb966901e2b5576a38" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22616064, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.23.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.23.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "transformer.h.23.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 17301504 |
|
}, |
|
{ |
|
"name": "transformer.h.23.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 17307648 |
|
}, |
|
{ |
|
"name": "transformer.h.23.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 22026240 |
|
} |
|
], |
|
"md5sum": "111b02b373084b7acd60bca92a8c5b0d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.24.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e57723f8a4815277db885c34891470ec" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33239040, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.23.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.23.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.24.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 15925248 |
|
}, |
|
{ |
|
"name": "transformer.h.24.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 15931392 |
|
}, |
|
{ |
|
"name": "transformer.h.24.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 28514304 |
|
}, |
|
{ |
|
"name": "transformer.h.24.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 30087168 |
|
}, |
|
{ |
|
"name": "transformer.h.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 33232896 |
|
} |
|
], |
|
"md5sum": "33e3893626821731a312a6ee3ceaadc2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21239808, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.24.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.24.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 4718592 |
|
}, |
|
{ |
|
"name": "transformer.h.24.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 5308416 |
|
}, |
|
{ |
|
"name": "transformer.h.24.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 19464192 |
|
}, |
|
{ |
|
"name": "transformer.h.25.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 21233664 |
|
} |
|
], |
|
"md5sum": "75d9c27900b20b1c163b19a3a4e1e173" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.25.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "17f41db072699a2e4e6524cf98741d65" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22616064, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.25.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.25.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "transformer.h.25.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 17301504 |
|
}, |
|
{ |
|
"name": "transformer.h.25.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 17307648 |
|
}, |
|
{ |
|
"name": "transformer.h.25.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 22026240 |
|
} |
|
], |
|
"md5sum": "9697d04cc496020aa008fb38ccbf69c7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.26.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f3fcb3a7efe21e02331150774b6846ff" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33239040, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.25.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.25.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.26.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 15925248 |
|
}, |
|
{ |
|
"name": "transformer.h.26.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 15931392 |
|
}, |
|
{ |
|
"name": "transformer.h.26.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 28514304 |
|
}, |
|
{ |
|
"name": "transformer.h.26.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 30087168 |
|
}, |
|
{ |
|
"name": "transformer.h.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 33232896 |
|
} |
|
], |
|
"md5sum": "1bc789fb183c399ad93a60d42b4f4a0a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21239808, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.26.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.26.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 4718592 |
|
}, |
|
{ |
|
"name": "transformer.h.26.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 5308416 |
|
}, |
|
{ |
|
"name": "transformer.h.26.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 19464192 |
|
}, |
|
{ |
|
"name": "transformer.h.27.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 21233664 |
|
} |
|
], |
|
"md5sum": "fa20dec944b4b13bb9b6b36837af00a4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.27.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ac2405e0f5171a6182e835d81def8c18" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22616064, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.27.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.27.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "transformer.h.27.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.27.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 17301504 |
|
}, |
|
{ |
|
"name": "transformer.h.27.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 17307648 |
|
}, |
|
{ |
|
"name": "transformer.h.27.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 22026240 |
|
} |
|
], |
|
"md5sum": "41288f1626ad1f73dd3bf98ba7e974ff" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.28.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "af158b92a69525946fd2696fab406a30" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33239040, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.27.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.27.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.28.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 15925248 |
|
}, |
|
{ |
|
"name": "transformer.h.28.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 15931392 |
|
}, |
|
{ |
|
"name": "transformer.h.28.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 28514304 |
|
}, |
|
{ |
|
"name": "transformer.h.28.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 30087168 |
|
}, |
|
{ |
|
"name": "transformer.h.28.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 33232896 |
|
} |
|
], |
|
"md5sum": "091d97272462b03d6e4056369ea9b873" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21239808, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.28.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.28.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 4718592 |
|
}, |
|
{ |
|
"name": "transformer.h.28.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 5308416 |
|
}, |
|
{ |
|
"name": "transformer.h.28.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 19464192 |
|
}, |
|
{ |
|
"name": "transformer.h.29.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 21233664 |
|
} |
|
], |
|
"md5sum": "1ac6f0b43c40e3af715dfb8cbfc616b4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.29.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "89466f384e1c8d0a9bac2d9cd2576c4a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22616064, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.29.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.29.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "transformer.h.29.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.29.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 17301504 |
|
}, |
|
{ |
|
"name": "transformer.h.29.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 17307648 |
|
}, |
|
{ |
|
"name": "transformer.h.29.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 22026240 |
|
} |
|
], |
|
"md5sum": "f96aa8657139aa1a9dbe347941008d6a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.30.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "084937b157648c3df396d82927d8770a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33239040, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.29.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.29.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.30.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 15925248 |
|
}, |
|
{ |
|
"name": "transformer.h.30.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 15931392 |
|
}, |
|
{ |
|
"name": "transformer.h.30.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 28514304 |
|
}, |
|
{ |
|
"name": "transformer.h.30.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 30087168 |
|
}, |
|
{ |
|
"name": "transformer.h.30.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 33232896 |
|
} |
|
], |
|
"md5sum": "3dd6e842ddb875ada6de9c1d979c52a3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21239808, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.30.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.30.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 4718592 |
|
}, |
|
{ |
|
"name": "transformer.h.30.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 5308416 |
|
}, |
|
{ |
|
"name": "transformer.h.30.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 19464192 |
|
}, |
|
{ |
|
"name": "transformer.h.31.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 21233664 |
|
} |
|
], |
|
"md5sum": "fd63f12b063b1bfe87331fd928b5c0a9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.31.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "27f6ee76d1c0b57ab3e3ab7a70c7ed8e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22616064, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.31.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.31.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "transformer.h.31.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.31.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 17301504 |
|
}, |
|
{ |
|
"name": "transformer.h.31.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 17307648 |
|
}, |
|
{ |
|
"name": "transformer.h.31.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 22026240 |
|
} |
|
], |
|
"md5sum": "775b10431acaeda4779df09a61c2974e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 49250304, |
|
"records": [ |
|
{ |
|
"name": "transformer.embd.q_weight", |
|
"shape": [ |
|
32064, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 49250304, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "06645f1e6d7fb3b8bd7940b8a7999053" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22093824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.31.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.31.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.norm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 15925248 |
|
}, |
|
{ |
|
"name": "transformer.embd.q_scale", |
|
"shape": [ |
|
32064, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6156288, |
|
"byteOffset": 15931392 |
|
}, |
|
{ |
|
"name": "transformer.h.0.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 22087680 |
|
} |
|
], |
|
"md5sum": "4a43ad66fd7aa84dfcdaa4ae2d0443ac" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.0.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1d655e0ac48cebb795c0a49fc74f76ff" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22616064, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.0.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 17301504 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 17307648 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 22026240 |
|
} |
|
], |
|
"md5sum": "8353030b08d0b87282dd29dd49f026a6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.1.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a31a9b38c8f7afa0a4c301439c8813cf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33239040, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.0.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.1.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 15925248 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 15931392 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 28514304 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 30087168 |
|
}, |
|
{ |
|
"name": "transformer.h.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 33232896 |
|
} |
|
], |
|
"md5sum": "b5b2da55e446706140f5933102dbd1dd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21239808, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.1.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 4718592 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 5308416 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 19464192 |
|
}, |
|
{ |
|
"name": "transformer.h.10.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 21233664 |
|
} |
|
], |
|
"md5sum": "169301f81d7e1af30c220c6b7ed18e74" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.10.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bd2c62ca4885217e4e28d03ce315866d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22616064, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.10.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 17301504 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 17307648 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 22026240 |
|
} |
|
], |
|
"md5sum": "90ac251eeabcab1d828b981df63e73e4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.11.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "05ab992c64f72440066a8c5ef703cec4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33239040, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.10.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.11.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 15925248 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 15931392 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 28514304 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 30087168 |
|
}, |
|
{ |
|
"name": "transformer.h.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 33232896 |
|
} |
|
], |
|
"md5sum": "6824b3636d94f5ff4095915ae1bd1d01" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21239808, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.11.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 4718592 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 5308416 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 19464192 |
|
}, |
|
{ |
|
"name": "transformer.h.12.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 21233664 |
|
} |
|
], |
|
"md5sum": "b409bf19f4ddc2fa25d522098d2f0778" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.12.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "396a5869ae6e4933b9e6c1237d17f775" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22616064, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.12.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 17301504 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 17307648 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 22026240 |
|
} |
|
], |
|
"md5sum": "53949c57ca9424fdb4ac8a72b81ed6a5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.13.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1a69dfa9bfda80a18ea347c234db98b2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33239040, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.12.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.13.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 15925248 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 15931392 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 28514304 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 30087168 |
|
}, |
|
{ |
|
"name": "transformer.h.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 33232896 |
|
} |
|
], |
|
"md5sum": "08a4fcb90a8dc8a1b6261937deb97067" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21239808, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.13.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 4718592 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 5308416 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 19464192 |
|
}, |
|
{ |
|
"name": "transformer.h.14.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 21233664 |
|
} |
|
], |
|
"md5sum": "a1dd109167140b273813782c0c23a0da" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.14.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1dac5c2879e7b571b3d95233032ccd5b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22616064, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.14.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 17301504 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 17307648 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 22026240 |
|
} |
|
], |
|
"md5sum": "35029506108517ba107f28e9e95e8101" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.15.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8339ac2435da45d72ab0ed1ba57eb38f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33239040, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.14.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.15.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 15925248 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 15931392 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 28514304 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 30087168 |
|
}, |
|
{ |
|
"name": "transformer.h.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 33232896 |
|
} |
|
], |
|
"md5sum": "90be9403a0c9f548ea585d86fc742149" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21239808, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.15.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 4718592 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 5308416 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 19464192 |
|
}, |
|
{ |
|
"name": "transformer.h.16.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 21233664 |
|
} |
|
], |
|
"md5sum": "56f510dcce07f501f016e8b50fdb93e7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.16.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6b7ac1dadcd83f3b1eafa0dda605c43e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22616064, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.16.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 17301504 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 17307648 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 22026240 |
|
} |
|
], |
|
"md5sum": "89f82be3645b4530cb31cde5e308f18a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.17.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ef270ab396a9e9eff63d76f3402aa757" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33239040, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.16.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.17.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 15925248 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 15931392 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 28514304 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 30087168 |
|
}, |
|
{ |
|
"name": "transformer.h.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 33232896 |
|
} |
|
], |
|
"md5sum": "803a95f566aa10e80ed4e3be5aa4e4ac" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21239808, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.17.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 4718592 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 5308416 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 19464192 |
|
}, |
|
{ |
|
"name": "transformer.h.18.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 21233664 |
|
} |
|
], |
|
"md5sum": "ad916e6dee1636d5df609d9be2de155a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.18.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "571a9a2509c2f5d84637f92ead665b28" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22616064, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.18.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 17301504 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 17307648 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 22026240 |
|
} |
|
], |
|
"md5sum": "3de4c37bf3c5af2ee62814c93f2bd07d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.19.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f142254078c28376f6c0bbb6f03af225" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33239040, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.18.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.19.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 15925248 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 15931392 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 28514304 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 30087168 |
|
}, |
|
{ |
|
"name": "transformer.h.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 33232896 |
|
} |
|
], |
|
"md5sum": "14463e8e243f17e339d7f05b25172b8c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21239808, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.19.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 4718592 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 5308416 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 19464192 |
|
}, |
|
{ |
|
"name": "transformer.h.2.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 21233664 |
|
} |
|
], |
|
"md5sum": "a1617efdab979e1362d76918e74abeb6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.2.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c1ffcc33c6e8307148bd393ab03d74aa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22616064, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.2.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 17301504 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 17307648 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 22026240 |
|
} |
|
], |
|
"md5sum": "b85dcb68586c9a679a963f6135e8e6a8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.20.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "660a1e5ba98609962fe41e44dd88bc8b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33239040, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.2.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.20.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 15925248 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 15931392 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 28514304 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 30087168 |
|
}, |
|
{ |
|
"name": "transformer.h.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 33232896 |
|
} |
|
], |
|
"md5sum": "3473869890d52a464fb3a090c7e2f573" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 26548224, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.20.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 4718592 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 5308416 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 19464192 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 21233664 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 25952256 |
|
}, |
|
{ |
|
"name": "transformer.h.3.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 26542080 |
|
} |
|
], |
|
"md5sum": "230091160c28bc1a6d4087c0e320eb74" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.3.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f6d4edacefc9336ba9f5cafd004016e4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22616064, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.3.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 17301504 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 17307648 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 22026240 |
|
} |
|
], |
|
"md5sum": "8df4d3e9c3af14f55e4a6b6b7516dd93" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.4.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4b031d833cd42535f0cba0c9ec2ea16c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33239040, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.3.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.4.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 15925248 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 15931392 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 28514304 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 30087168 |
|
}, |
|
{ |
|
"name": "transformer.h.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 33232896 |
|
} |
|
], |
|
"md5sum": "5a2f4211261e5d167e2d3ea35ff820b7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21239808, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.4.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 4718592 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 5308416 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 19464192 |
|
}, |
|
{ |
|
"name": "transformer.h.5.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 21233664 |
|
} |
|
], |
|
"md5sum": "e2046fc39e04e4827353ab4405b18feb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.5.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c908989f4f49929d27cdabf7ce4ad4c9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22616064, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.5.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 17301504 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 17307648 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 22026240 |
|
} |
|
], |
|
"md5sum": "1c7fc776645b6efa60e72b4ab465939c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.6.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e03fad170f58719e62ebad62a0bd87ff" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33239040, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.5.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.6.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 15925248 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 15931392 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 28514304 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 30087168 |
|
}, |
|
{ |
|
"name": "transformer.h.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 33232896 |
|
} |
|
], |
|
"md5sum": "97965812ae11287d886d875731f91b3e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21239808, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.6.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 4718592 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 5308416 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 19464192 |
|
}, |
|
{ |
|
"name": "transformer.h.7.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 21233664 |
|
} |
|
], |
|
"md5sum": "1b60d1f9974d262259ff4d01a0518441" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.7.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "90dbd3f24410567c7a2acae21e974c64" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22616064, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.7.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 17301504 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 17307648 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 22026240 |
|
} |
|
], |
|
"md5sum": "468c8fe93c1e0f754ad70ff158c3f15c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.8.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "efe7fd6dbf78c18a920f768e6bee8283" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33239040, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.7.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.8.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 15925248 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 15931392 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 28514304 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 30087168 |
|
}, |
|
{ |
|
"name": "transformer.h.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 33232896 |
|
} |
|
], |
|
"md5sum": "a53f932966ff79c34fd7c0a30332e9b1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 21239808, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.8.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 4718592 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 5308416 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 19464192 |
|
}, |
|
{ |
|
"name": "transformer.h.9.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 21233664 |
|
} |
|
], |
|
"md5sum": "315df597e452764f9ea6c27203adedde" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.9.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
16384, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c2f6a88a7ed70b27e02ee97521e80ae0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22616064, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.9.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
16384, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "transformer.h.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 17301504 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mixer.out_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4718592, |
|
"byteOffset": 17307648 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mixer.out_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 589824, |
|
"byteOffset": 22026240 |
|
} |
|
], |
|
"md5sum": "09f175ac1c68ebe016b15a6d87156a6c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 15925248, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.9.mixer.qkv_proj.q_weight", |
|
"shape": [ |
|
9216, |
|
384 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mixer.qkv_proj.q_scale", |
|
"shape": [ |
|
9216, |
|
96 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 1769472, |
|
"byteOffset": 14155776 |
|
} |
|
], |
|
"md5sum": "b729a4f201e3e5acf9d7d5ab5929b661" |
|
} |
|
] |
|
} |