{ "metadata": { "ParamSize": 125, "ParamBytes": 1513693184.0, "BitsPerParam": 4.500437647753687 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 65536000, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32000, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65536000, "byteOffset": 0 } ], "md5sum": "a5721d9560034ebdaf9d54846920d333" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 33357824, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 32000, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192000, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 8192000 } ], "md5sum": "5552f5612454729423354be2af931df1" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "ba27e916567a0c6f37f181aa8df94d1f" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "734f8aa85a33629a93177a47c054492e" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "13ff761572920e237be3b62d59cbd4e0" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 12582912 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 18219008 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21037056 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "1d1b5df3e123de5bc1fbb1993222d84b" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "95d00be34be16fcf3a2fe2302724e82f" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a422e8e8913d0267583354c6daa769a3" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 1048576 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 6684672 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 29229056 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32047104 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "b7017825ad7f81708e314fcf9d7cb0de" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "4b91662b6e1ee4e7b38296ec31b72bb1" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "852bbb7402bea2c0b6be3265d3d3c85b" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7e293d65ec025332caa0bbf0809fd83c" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 12582912 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 18219008 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21037056 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "05b55ef5f5e542eae1880b70a3058648" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "7a434038b09c4c078fcf530723227ae7" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d568ff3dafce1741101c8e53b9bf5782" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 1048576 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 6684672 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 29229056 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32047104 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "b8783055d9133af4581ec9c708e2e387" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "f7e0999619a4b8fcf8ed2c4d3a43dc84" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "760ea2fa6f4a377b77a3ea3931c01a9d" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "70a3d08ba73b8b54807c494e0fec7c64" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 12582912 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 18219008 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21037056 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "32bfcbd2bab849060a7708b6a7c733b9" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "e8d3b2a91b3f9dcdbafe5429b4ef3532" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1fa932714da6576f3d25c1d8f2383755" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 1048576 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 6684672 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 29229056 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32047104 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "1192f4b46399ee1f4117a3873193a3cb" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "fff6ec2b589e78fbac6e8f71c361c792" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "6348bbf89535736f69b5fbaf16beaffb" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "97a5111ae4c3eb75a706632a87dbb4b6" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 12582912 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 18219008 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21037056 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "17b99865e224249e9c4a5724abef7548" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "dee2f47ac2113817449ad4037f14cc63" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7a5feeebfe6442bcc4138fca9dc50b88" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 1048576 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 6684672 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 29229056 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32047104 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "b00d0bdc57592edb480b7924e7c93bfa" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "8d7931724b0b7b94de937af5b59bcc8f" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "8cd54da1c291ad7d61883e5e4aba30f8" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c8caa63d672fe772e6b9a4c4146d8671" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 12582912 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 18219008 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21037056 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "622ead990a574bd531048c9fe80f6b7c" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "12055f1adbfb791eebbc0078fe8e5858" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ae6ac334b031863772956ded167529c6" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 1048576 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 6684672 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 29229056 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32047104 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "ae02657a6091f73cee639fad0a51c79c" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "2cfacefc2fa03ee433b1f6cefc838b4c" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "ef213324fb1862847cc91132225feb63" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f9c0976e25f3666e26528100ac3b1692" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 12582912 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 18219008 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21037056 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "05237e2904fb62b138e1e6ea5c3496cd" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "d309b28471e4124e03531f6806631ce5" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 65536000, "records": [ { "name": "lm_head.q_weight", "shape": [ 512, 32000 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65536000, "byteOffset": 0 } ], "md5sum": "ede184766099ed6415755f6629949e57" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 32071680, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 1048576 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 6684672 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 29229056 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32047104 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32055296 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32063488 } ], "md5sum": "89d5e83b4da202f7bec532285ca1e80e" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 8192000, "records": [ { "name": "lm_head.q_scale", "shape": [ 128, 32000 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192000, "byteOffset": 0 } ], "md5sum": "3503f06e614550cba195b922d158f64c" } ] }