{ "metadata": { "ParamSize": 709, "ParamBytes": 18431289344.0, "BitsPerParam": 3.2800260261877354 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 389283840, "records": [ { "name": "lm_head.q_weight", "shape": [ 152064, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 389283840, "byteOffset": 0 } ], "md5sum": "85eeee228b7acf1d3573d414cca6a4fa" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 48660480, "records": [ { "name": "lm_head.q_scale", "shape": [ 152064, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 48660480, "byteOffset": 0 } ], "md5sum": "dfe53916f79c91004f856defab88533f" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a90808a40fe3ed3c0d9a14b17d1898c2" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "28e3386c8080abc7f883f18899502951" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 389283840, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 152064, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 389283840, "byteOffset": 0 } ], "md5sum": "f113a43b7a19a3131d97b0be7e26f57b" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 48660480, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 152064, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 48660480, "byteOffset": 0 } ], "md5sum": "40ccdfbe7a5c19e32d5e104a2a4ba983" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "22fceacc6617d230e6f843a148b655fd" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 26583040, "records": [ { "name": "model.layers.63.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 0 }, { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 10240 }, { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8857600 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26552320 }, { "name": "model.norm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26562560 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26572800 } ], "md5sum": "636d1643c5ffd08c756f9c5e839e446c" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "52acbd1c142fc2815db5035f221fb2da" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "783f1a64645548527b3aad8d0afe2ad8" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "4d7efeda756e2b8a6606a5c17c5ed3b2" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "84168424aa0509cfa58723d6421c8a38" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "3744ee7da8c201e1784f830958992aa1" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "bdc96ce69c763fabda1af3a2d5f8f06a" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "66674296da7b5325db4a4479104320f6" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "48b40918855192038bca9592949eff2e" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "05dc195d101c4303a0cfbe8221eb1119" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "700f031890fcb290c6eddb9767e19f50" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "cd8ebe07bc86bb65d50929714dbecacd" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "97f9c60f1c0b8e5ff46bb04428684a98" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "9e6b5d552594b3d34d6fcc61901e7183" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f35b2aa9926572a184a5498da2f26c65" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "0bb0b07eda7a4deb31a6d8f0f6af96bd" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "5b9db5e5e08da4f537968ba2dca32d19" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "452135d5e1d360777fec4c8cf80beda2" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "5d3d01b8d9d53c14459297ba6491bed1" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "6f643bf8b3700d6f4cfc503dbc73aa8e" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "d10a0ce6346bc48699c8bf7e46c73a86" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 28215296, "records": [ { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 25907200 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 25921536 } ], "md5sum": "79607a453cf36cc2d2ae64ddc966a6e7" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "82b851d3bc4843580512b9d605b90eec" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "c9c7b95d25b4fa0c8610e51194b22879" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "ecfb53a2b74163222988f60730564fef" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b6f2c5bac294e41c812e8ee1f5d72f2e" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "26a1a14f830cef6c966734b857d65945" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b7231ada028cea01dbc3bdea79fcb0d7" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "36a67c4f92a1b123faf938f54c1d753c" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "20e8c3bcaa48e5279f4e0c81643a288b" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "484f68515afd2a8ed4d16c38a2ddd552" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "0bfeee1ed855b9cca07ccd7f206d9dcf" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "385b52a7dc908340003b2533bbb00d08" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "04790d1ddc9245ae4500f109823e7614" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "8fd2ce6f06f5d8db2a9cc8cede4086ce" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f5b3047735b3f77e473f933ba90e8354" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "8147a0668271de2d7f2639b7bf9e9e44" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "9889cb559a97df0b7d92bd6edda1ba5c" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8d5654d5014a9119940dd3c1dd9a2e15" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "7931e8168faf6f9a16c30adb49c38fd3" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "2c92a48e686c3803c5c5469446862c3f" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "456c2fa585e684bc801201f29dbb1250" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "ef4f405667fca151db355ad8511ff4bc" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c6409b2d9c564b0ef9c18135a3ee57a7" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "7a80e3456dddf1724a452ac04000259b" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "20efebf5931d2a646624557ea6d44070" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "66ee978e4f744a69b986e7a384e22ce4" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "bf91e788b1e51294a567ee0bf8ff1de8" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 32495616, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23613440 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 23623680 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32471040 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32481280 } ], "md5sum": "f45bab583b9c319edbea51303aac1408" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "fefcdccb814f8a6da64ba249949c26de" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "b1388d3dee93947029bb5fd016a12dee" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "0a451786ed91c8f545339c5e0ee59163" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "79592e47cfd115b14360dc7c6c253b41" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 28215296, "records": [ { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 25907200 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 25921536 } ], "md5sum": "d7a55f1e2e7427c8a7ddcebdab354e53" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0c4bb8eeb5db94cdfc479051e64cc543" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "ef64a97791cd8d8270f150fe179e5943" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "50bc74bc4f9abd8a6baff46fefa2a891" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "43d291f90d056acf085e50ef8d1b060a" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "d41f3bf8fc5c27ef8baeb1c18225189c" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f915ebb44b8d2e5774ada7c19332b6a4" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "92a290b59585a24cf61d24fc3442fcc3" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "1a7637fdbc7665f21b252c8ca1d134c4" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "645451a6533122d0a5ed88f55a4ab375" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "230a1292dd846bcbb9385a7e75408ee0" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "3d69055690987f100c2a6b138d1ad630" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c9ef1c0d2b9155f5c8b47fefe7627ebe" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "ed46cfc5fc0bee9b456f0a1651d91ba9" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "29c8284a5664d79490aa01f1a997444b" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "05367f15633879b94d4f9d18885f6089" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "5fd49d39ae58132d15e5bf08778ddbb8" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "1536b285a863d2383e7c17285dca4912" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "a9e68a7704444c42585fbe330e392acf" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "fe35cb7c76ff5f04e465f6a0712a8b41" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3ef91a00d81bf95bb9536989cdd18a56" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "407702f552cd1a08e37c5e05a5eb02bd" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "50500ca1e1a84e72508669f940f0059f" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "dbcbd85f0334606e02c31bfe36a509b6" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "b733756763aade2d0fd6eaa7dfc874e0" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d69b81cd89c5cc73c3e18aa1c3191cda" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "ea5a002a35f0bfcd0daed351e96d1636" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "c8404f2aaa61ddfe14bed73167c4786b" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b168102bf74a8f92a720520e4141e3fe" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "24a1bb0e1c08c5661162e45e1152bcf4" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "093389255b5b2478bdab4e79c4798d8b" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "30d3ffcbecfe0ed099d72d5fbb9d569a" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "c665abca93b69686af4b093938f95d87" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "fcc7310d8723760bee0aba6512d77653" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "440c1aa1847a33415cde46727aa7a29c" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "ff6143df6b3577854a032aca19ee13a1" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ec405a2f35120db60f2828bf44295f35" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "44533c2d66c5b56b2b70d65b3e77bf16" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8bbd9ded38e23907b6dd44ae2794e19b" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8e9d7448f76b69827bf5e535be74489c" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "ef1b9828c8d9ddc18a8b1cbae9f00af7" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "46d8db0a1f7339370949d470ac3ae027" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.24.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "8a8c6fe96908be697533eb597b21a892" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.24.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "d61a5847373bf85aeecfb353ab25cc7d" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c888de72f0ee54b8b2676bc61fd03938" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "2ea0e88b40d1162eb6217d2cd4ea27a8" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "3680b0b5a5d1c899bbc45d35e36202cb" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.25.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "3e85fd828e1dffb045f9376320a21e9a" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.25.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "2424725e51c38699badaa04c37f68f8d" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "55864e3c2228f1479cc34434a00cd9cb" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "0bf8dd1019650a45d734e1cffb882b28" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "6a44ec6dd8d585a0f5a4eca63d87e868" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.26.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "340fe96ce2fdab1c557ebae93453abed" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.26.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "bbcff15c2e7d67c88b3b481a0e346a06" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "302673dc86afc6021293f3708967994f" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "8ac421d4126e619f6abc96f37516a474" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "941ea30b2ad24f70028d85db9b5e7501" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.27.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "6c05a907ff8f1513d430de73f0b43154" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.27.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "085cb3b87dbb134841c0e973ebbba8ea" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "cb5ba6d655f0857acd083b27bc61253a" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.28.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "4b6a09dff649fe19fb55285152e45145" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.28.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "1546ef219b395101199bab076fb21878" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3f698c87e688258da14da6523c6e1d5c" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f77b1adbbe045c95eee7efb63cf12785" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.28.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "345c535ca37913c72750967416de3da4" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "35a773d6a5d6a8760b9df27645effbee" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.29.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "040100c6b53e63dd38f45377b6cc7020" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.29.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.29.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "ee9587811c73551e28e41f71097cedf6" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4ee0ed533993b9a998c38adfcd285483" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d992bab848969ca6c14501635905bca7" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "198c3af06aab33d72cdbac8fac1d1727" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.30.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "92463a89855d23ab2272d12e36084a6a" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.30.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.30.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "01d2bfa6a62a0efdbc50ee10ef9db9d2" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "846bd82df5cd8d7f2df5cfb1ba0f1807" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "7916a8d920dd7d82eb95c6926cc2433d" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "09937f56fb49648c50213ae254837f52" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.31.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "25e86594abf5b73e77dd7d60748f38c2" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.31.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.31.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "ba6ffb3169ded873674b8c9f72c6ba4d" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d062fd2416632ba0ca47bf0754a9687a" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "93861e30f2f154bfbf25949f98449a39" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "edca9c989f62df96325dac33d2eecde9" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.32.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "baed023ad3a41b837368700e18ce56fa" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.32.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.32.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "b2bc910035146b5c6053b46e9c952324" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "1bd103c82c7161b088b91ae61b8faceb" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.33.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "211f9c8aec9e21282704b5c338ba34fe" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.33.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "021c5d979694959396b4b938be6d09bf" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "416a38e4c55c6d911c3671f3b90218e8" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c6cec2dc3608d689acd2c60be3d7321a" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.33.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "4d55def659bd1deaa89e67ce8fe480fc" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "026b928754b6f281c966a4afb24c5ec2" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "21949021bde1394eed48e792a68c9bc9" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "06ea052a98f16b863c6a3a22680ce176" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "98644b298b7b2db0f82a18fe2120c8ae" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "694e57be31cc3c8421e3a69322a25cf6" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "f530217ee5412f7e7c62cd5e1f1c0b5c" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "0b9c76ebe23087031b6c72fd22af15ad" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "03ff9cf405fa287dd4ed037bb80d65bb" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0d4b6cd3fd8f65afcdb243e45177edb6" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "48309a79083cfacd6130ea4957679f36" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "156f01c777032928fa825aeb600a4e36" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b4591a9d6b121278ee3accd6ca1a8e3f" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "ab12c81681a4bb1963990d3bc85f57c7" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c356a01c6864fae29f1fd012f1c4f817" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "b090a38249a8088db3291c7fbd99b248" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "4eff0f365249f5ae6ed486c1af6eb28b" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "59d21afa968bbab76f47e0bf27b4e129" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "798177e031be9bd9478e805b2a933788" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 33110016, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 14745600 }, { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 14759936 } ], "md5sum": "01f0907b69a65d2c79de8a1183d96375" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "04f6e41018990e5052ee61518578413b" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b213a0551376618cb782744e57437759" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "1f7ba37c305ce969087a474fe7ea9e43" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "127ee7635a72edef08396cf030294e80" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.34.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "20d065e36c0919de51859a7195b4cf17" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.34.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.34.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "f17a2be14927274f3b54554028e98cbd" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "bd3666d592e9012ba122902461010202" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "e53517de1ea9cf79705cbbd29ae72807" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "e18c8725e45895c512e62d1fd1313891" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.35.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "407c7f98dc7eef57b8077dcc5fe05957" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.35.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.35.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "1327775a06a7fb81cbe43b08cdfe5d5c" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3ea9b55bc162ddab9f350d6237fabe71" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "afbc3b0e4721d1fbbe8e87104a4aa729" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "580d3d7757a1d8b154d1e1ad5e5ab8cc" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.36.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "db64eb3a944831be487fa50c3fd20679" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.36.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.36.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "412921d0d93e46e5246b090360793a69" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "eed940d2e829b0418ce444564dd582bb" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "3eb9dfc0b1475f21cda3152e414a385c" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "44f5a92ff129b6aded3ea276c79cc4f4" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.37.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "37288aadeebd00308efd7e36b59c11e4" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.37.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.37.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "71e2a1444447caa599d472f1e1bebdd5" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "605200a32fd0377389d4e1d057e54e3c" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.38.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "3de6b39c115544abb723f51e131e6055" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.38.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "8f06f9e738d561b21abebb000ee6eb67" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "41d10770185340f9a5524f7f00efcb1d" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d04809513be9424ee5f3f81a24c0f562" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.38.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "c6db86a469fcc7251a0683d742a11da7" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "826f51f4bedcebc145dc0d20c0167bb5" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.39.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "034b987573e4efa5ce460dda9368cc80" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.39.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.39.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "2b947c28c0781045c3c0ad98a9d812e3" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "be5bebad52a72d537777cd47c4124515" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "5fd679c0c1242831eaf1e1f8a4997652" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "8371ba26e6febe4e55cda2f57c3c552c" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.40.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "db0fc56f86760013f77fdab203d7d72b" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.40.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.40.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "c1e07d69c511dfe62f6dea877f165581" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c26f9512cb7f47921e0e250a46422949" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "a7c9330d96981535644f861385f2f228" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "e6507cc50b49f17cb0712aa6ade6b3ae" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.41.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "0c902f40cfbd73dc410376dcd9868ddd" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.41.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.41.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "6c83cb4b44ae913f963c0309dc9dcf24" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b75a635aba39f4dceba49c18b4dd811c" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "c4306b0ad5a566ac9562bf7455d8e4fe" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "b6032401d675be4f5bb041a5b3cd0daf" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.42.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ce6c966f0545af437429718f8b64cdcb" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.42.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.42.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "81aaebd77d6fcbd28cacb229b664d92e" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "6d62429ad7fb15be83fce0c0967c07e2" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.43.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "9863264e0d8c9cbdbad4daed0b68bdff" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.43.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "dd26bee0b2ee85bb9929bc70b588fb11" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "9f611e6343ab86efb98113753f1642f3" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f456eab162db784502b19145563b7998" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.43.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "dcd96dbe92f948e1bfeb4d7848e1266b" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "ba0352a2292e4173491e51bbae330d5a" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.44.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "8195f9a68e664e0b3f8fadcd08d63e11" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.44.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.44.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "d2ed40bf7c7954aa2516321d22838ada" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b9d6d7b25fc49f1eeea498ea07c9f1e4" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "2b8273a17a28c7647c278c773c5a2699" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "a4ef78b12120b96efce2c513e6073639" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.45.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "756ac7f17bf591e948749cc5fdad87cf" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.45.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.45.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "4cc8646614814768e8914834153bfd77" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "13f26544152f1c34b32e3f8c461c0df8" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "ebc43e483847d6ee34773d11947c517a" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "321afed44dad91a6be3ceec4fb3359d3" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.46.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "606247ee979a0574ae9e60971e422af1" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.46.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.46.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "9f34784589280b192f7698b13e660983" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "fc584e097d63536471ec07864c1225b9" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "079c7af479096d539e7b2fc08152e309" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "dd45bdf005227efd0191a8092986009b" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.47.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "21c5022db8bb4019840560c4857d42b3" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.47.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.47.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "317cd4539733db9e070b60ece0b6e1b2" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "37dd0e460e7108955b223f1e71db6b77" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.48.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "9658b781bed78fd6a1c1af21acc53525" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.48.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "12e88b2a8926d189c2c865a73e0ad1bd" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c59e75b029d2371ef2aee2ce05bf8f0a" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "cf989ffb4d8fcc9250d20611d262de1b" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.48.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "b189e6770f039cc542913359de2987c0" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "5c8788fb74acec05330e244d5e7a43f4" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.49.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "4c8b54da4ae49bfb2fa034f1fb60b3ba" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.49.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.49.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "cc84d50dcd7c6c57064ec4670a90e717" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f9ee7aae16e1c0c6937a1d99e4572b15" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "9b554543a117ceb28f4cde2b9206fe5c" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "da490a0dcfdad0df800b09aaf479ebdf" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.50.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "372d3f89e7e33cf70f958743340f89ae" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.50.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.50.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "973169553682bdf9ee3f2934081e7c2f" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f1f4ea06779f794902636f940fce4d18" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "0abce49c740e60c7e57d308ff93887ec" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "d91d9d25d01d40534b69252ae937ae62" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.51.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "a791493d37987bd37aa9a0aac3d5a3db" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.51.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.51.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "cc615ac6d42685b8e67ebcc8cbcfb760" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "226c0952d0ba3af1022f96e3f7142c7e" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "c5cde0220354d6cc7585be5885b566fc" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "c825e827b441718203fafa790c1f38d3" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.52.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "f5e951a2eac9a386ca565fea8fe977ec" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.52.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.52.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "5169c748f32b91a5af0713ea6463f906" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "16fdc07c74ffb20a42e5e85564c34d8a" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.53.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "3d2e0336739f08ddbb31055a2b46150b" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.53.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "589951c3a7cab670fb07818d4c3ad58a" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b4f4c840bef5279104f06d64c449cd58" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "70ff3a5d8f67688e43ece99c4762936d" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.53.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "e73d16c54599de8adc19e7843776191a" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "85e7c46beeeee9557cc119770be89285" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.54.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b4ac1f0bf728c6f4139fdf546402aba4" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.54.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.54.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "37ab07701a3779c5c9ea7ddd1dcfa7af" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3a72bf621d32c9750f4bc18fe86bcc1b" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "0ad6c110eb17e10f236e2c2bb4d424f3" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "a787ec1a4a657249861070f635fbe942" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.55.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "e3a0edb3d6d72abb384b608fb0051df8" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.55.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.55.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "040cda2c007fbd175435bd4321eed5b2" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2e65f508490e5d6d0813e6e3143c7123" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f4d31f4d710e8a53d5134cd4f14b2ef0" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "61b549108552ec735beebd8038f09457" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.56.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "0aa4a6d53530d2e86713b507b2fd8ab7" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.56.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.56.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "dfb96902de296f8908064e76fdda82bf" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d1135638981f95ca80d9584073a1380d" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "a5787499087d639928ce44207b7d46ec" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "48464a8eba3331f5074a667d9ed08f4f" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.57.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "5aeb35f9d93c85d401cd9ec3b69f1cea" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.57.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.57.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "507d68326febebf9ac15cdd83e9cdb6a" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "3ae35714e3ba586740b99ca57f52b766" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.58.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "807d2bf041134199a2ff6be7da39f0f6" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 }, { "name": "model.layers.58.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 32440320 } ], "md5sum": "dc6eae71f505301aacb6fef2075ca821" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b63ba91e8391691d72a507450ac7a880" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "99d11dca6ac45e18a8a9db47c5450b66" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 25917440, "records": [ { "name": "model.layers.58.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 17039360 }, { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 17049600 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25896960 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25907200 } ], "md5sum": "88d257879d91136139a9618f18f7bced" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "32d530daa9b5ae0173f78045077902bb" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.59.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "80849a7065311fa72aeff65379a17d79" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 28860416, "records": [ { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 8847360 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 26542080 }, { "name": "model.layers.59.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 26552320 }, { "name": "model.layers.59.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 26566656 } ], "md5sum": "2ef89df0a695e4b78c50d57cd44f4e1e" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "cd743d35ea2afeb28f3443a677d2e061" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "74b0168e9af68ebba083f09686dd0f4d" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "b5b0e3055c38cd784869613cff7637ea" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.60.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "a98604dbcaa2e1034cc86496ce016fda" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.60.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.60.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "d42f8f4ab6232929ae90b958f4fa25f6" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "362064a5d71a035f2114f11dd95f3db0" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "166dcc956f5582adf31fb00d48fd4de7" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "e73eea9b4258f788f24403eb3baeb9f2" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.61.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "79c6d60a8376b091d5a3581cc8dc2adf" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.61.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.61.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "f092a176420452cb12f86d306190a3f9" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "34bf9fc03eea17df4c318fd52ac9a4ca" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "1c2885467e1e33ed0fbd9dbdfe14daf2" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 17694720, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 0 } ], "md5sum": "a38c36e077eb1318f9842059c76878fc" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.62.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "befe7519130998f9ffdb34a386c487bc" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 25921536, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 14755840 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 23603200 }, { "name": "model.layers.62.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 23613440 }, { "name": "model.layers.62.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 23627776 } ], "md5sum": "dd30a16d3458286c3b66494b3def1734" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 33110016, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.63.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 14745600 }, { "name": "model.layers.63.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 14759936 } ], "md5sum": "3dac58ee2b1ae2acfe865ccc615342dd" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 17039360, "records": [ { "name": "model.layers.63.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 0 }, { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 2293760 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 15400960 } ], "md5sum": "abfdfd40d2173aea2474cb20f863f116" } ] }