diff --git "a/ndarray-cache-b16.json" "b/ndarray-cache-b16.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache-b16.json" @@ -0,0 +1,7447 @@ +{ + "metadata": { + "ParamSize": 485, + "ParamBytes": 21092663296.0, + "BitsPerParam": 5.000635812792825 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 32000, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "f5a86c938b5288e20892760971456f7b" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "a7a59015b87059f1efb0617cd205b573" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "cd7b08a28b39ada477dd473e567a5350" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 27705344, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 32000, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384000, + "byteOffset": 0 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 16384000 + }, + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 16400384 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 27672576 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 27688960 + } + ], + "md5sum": "75e5c2791fcd05ffa12afc355e5b251b" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "73876dee5e70b853678f956e4a03f118" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "77d4ea5d8db10a5cd7093ef0b74a6609" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.42.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "16cf358b0a0fca3aac5ac45d17b2b63a" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a55cccb38fffb04c6042854389f695e4" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "3396009e1b6f8e2ddc9a40b98cdee0d4" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "49293a617e0920d150d046c1ed2362be" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "8b2feb8b752be87e218138256f3cb6a2" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.43.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "0365a28afc5a6a7b515a9577bb0dfe5e" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 32030720, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 0 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 11272192 + }, + { + "name": "model.layers.42.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 11288576 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 16531456 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20725760 + }, + { + "name": "model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 20742144 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 32014336 + } + ], + "md5sum": "55517478f12ee7dfb930d27800d456b8" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "757161aa414bd79d71d1e8f281da2ab1" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "7f6075a48b8c4ad4988574322f117252" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "48ddc46af4706d825b1b2479e97e58d3" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "9886eb6c8e9aba55b2eb442d3a2e8cfe" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.44.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "12fdaee7e7c76120296f0c8d687036cd" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9fe8d8e85571b298ae86a064e2467157" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "e0603619c2f73e7a98ab56b659a1dc90" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 30195712, + "records": [ + { + "name": "model.layers.43.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 9453568 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20725760 + }, + { + "name": "model.layers.44.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 20742144 + }, + { + "name": "model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 25985024 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 30179328 + } + ], + "md5sum": "91df9b3fc60f48953d8ed27cff096b7d" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "bd51fe9486ded1e99c0c51d1eb513265" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "eb93480c914dc22b71221b337d6bc107" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.45.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "98e0ff846eb77b9b743110b8ef2932db" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "0998ba599df454a4e9f40595bb59a17d" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "8c68b6aa632b709201714c6bb2e6b5f5" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "fa91d310d28220dadf370c8e0dfb95f0" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "adb2e1b6757aedc0a9662adab6a781c5" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.46.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "f85c26efe47fb4f08a653d72bfffbff5" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 32030720, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 0 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 11272192 + }, + { + "name": "model.layers.45.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 11288576 + }, + { + "name": "model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 16531456 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20725760 + }, + { + "name": "model.layers.46.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 20742144 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 32014336 + } + ], + "md5sum": "a6019f4216851655bdf2a05250278f43" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ffd646882a387bdf2bed0e5773395923" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "a40fe457a4bba633b971b84ff09f9104" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "dc67573d10efaa78f0b4c2a2a0ad7a43" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "1853a8aeeebaaab84c2a5b708cfe2495" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.47.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "d5704e9821694c73e1bfed65f4f47826" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "616c6a9be9a2be47f865b3eb489a0fd6" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 131072000, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 32000, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131072000, + "byteOffset": 0 + } + ], + "md5sum": "b478cb7f6343a16d773c1b911d893f53" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 30195712, + "records": [ + { + "name": "model.layers.46.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.47.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 9453568 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20725760 + }, + { + "name": "model.layers.47.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 20742144 + }, + { + "name": "model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 25985024 + }, + { + "name": "model.norm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 30179328 + } + ], + "md5sum": "811e9771e65591a56488241548ecb523" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "a1700afaa85d0a03c798bf4c46ef76b9" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "6c0732798060f31e2ad2d63a564f54a1" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "3fce92efc4efddde311e5ddc97f30fba" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "f571ccc6e21210189bc78fed238e377d" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1eb6cc65993607064266d1a58edfa622" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 32931840, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 32000, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384000, + "byteOffset": 0 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 16384000 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 16400384 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 27672576 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 27688960 + } + ], + "md5sum": "55401ff098b5c841a856be755b017486" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "7e102f04bd37343d55bfe34c934d039b" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "368ef6c2bf099e8574516d6c55441301" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "4ffad3c752472c53c1e15a0ce9a647c0" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "aa91c4e3546ad5dc51c711a15f07d54e" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "91c1cd202d84837f9000e94e83fb0331" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "788ade7e2d59a828688e54247536225e" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 24952832, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 4210688 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 15482880 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 15499264 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 20742144 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 24936448 + } + ], + "md5sum": "0c6f4bc91a9714f89e41caa19f89bde4" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "9d40faaa9885cdc2ad924dd123fa1d77" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "2cb508080821e9c01cce34b354fba070" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "9464d60616204f0632377f5e59a8d481" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "888d375887159dc831ca71d26f65cc9a" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "a1432a444d436da3848553bf24b888f9" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "e51b48672a54bcee78ced8b24636c404" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "28980d6a0ac203305ce8394fd9d121f0" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "aec5328b00730d9f4bef66f69ba28e63" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 32030720, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 0 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 11272192 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 11288576 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 16531456 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20725760 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 20742144 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 32014336 + } + ], + "md5sum": "4cf2a3a6a43e701bb3000c16bbf66bb6" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ccfdd50b0c3813da687403bd4d21d947" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "8a8c3fa3407efc9d8e329a47da0a234a" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "1a92d598f3b87c6f9b2f73e541bd5ef6" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "ba676dc819754dbf376d55aa1396c4f9" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ff08bb0778276a6ab8998204feeda7ed" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d13000a03ff4ea74096887d43593cc4e" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "07cd7d22602e4c7f85a18c6735974838" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 30195712, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 9453568 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20725760 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 20742144 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 25985024 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 30179328 + } + ], + "md5sum": "bd326e8929fa927d108427d9cbba3b36" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "1ccf85633a2e48e2c84c306c03cf4bd2" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "7927f0f2d8ec483a7d20379fcb252a19" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "a059aeec62e31df3e379a3139e7fbaa6" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6a5ce7ae181620ca0bdb3558d52cd8a7" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "e1853ca44144248b9e0a93e99d5a90fe" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "0678b13d30fc07afc89dcfbc9559e377" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "204be4e66ecb402e24847de7d707430a" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b54232e3f6c7d69747c617452a4ffd19" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "d01284fda12172b47e18319048ee6194" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 30179328, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 0 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 11272192 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 11288576 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 16531456 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 20725760 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 25968640 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 30162944 + } + ], + "md5sum": "cf5e22ed474d6d579ce68f048b068fc6" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "ead6ef1064ac56ef4ed4b2cd3cacb2a6" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "00f9eef2a77d7cc0831384d03966a94a" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "669d7d8907fe623b17ce2ae940dc1cb0" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "083456f46bd212164a6f923bf15ba310" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "426599f8b107fbda7b77f46b1c9cd977" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "aaf6e9f5eef56638f3c84355b11c7020" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "22397b49b118f2378b0db8ff655d4e1a" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "8234c72ae837c5c4a9b6e23681ef687e" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 32030720, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 0 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 11272192 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 11288576 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 16531456 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20725760 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 20742144 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 32014336 + } + ], + "md5sum": "3be0978fd3503720e3801c177c8c536e" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "7a221865c6071e68620ace4948c96f35" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "cf4f8ee7ed8db6060626b34b87853780" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "fd0e240501fecfbd76dfa46a0b07dc22" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "38fad15857ffa6a7b074aa887e3634e1" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "9e968161390387212dc692e69bd28c7c" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "25d3bf6aaf24b6b7e052d37f54a47190" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "336726be4dba4bae5b89d69f124446fd" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "81862342b91c04fdae8d7f9647ef83ca" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "6670e94aa3e41aa822e077f7cb1914c2" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 30179328, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 9453568 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20725760 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 20742144 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 25985024 + } + ], + "md5sum": "7ce7cde30725471d307f62a017dc0961" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "06d31c3396959127e431a133178402a0" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "155cad885d58e70ad8f379781953be90" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "ab59c66c3a60bf498407d6b6a0741578" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "fbba9c48990ada0e32743c216f21feb4" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "5d8cd940e67ce63931ee8adf2a16ecb9" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "d53a3f34715660d6ae6d7a34bfc30eb8" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 32047104, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 9453568 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20725760 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20742144 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 20758528 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 32030720 + } + ], + "md5sum": "820583abccc950576b65d2c0829fa8df" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e798cdceacd2180a724e3dd57324dada" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "44be46cdbd9fbc3afa2269130bde5a89" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "13e43741ec8f0bd5ababe287ab6f2222" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "74ae5ab9ee09a698332aaefea079f018" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "8917ef4b7d63febab7b2edda41f9b5bb" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e394ad9cd3886078eb6eb76e3b208196" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "c7cb91f8d0711a3d82525a4198f0017e" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 30195712, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 9453568 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20725760 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 20742144 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 25985024 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 30179328 + } + ], + "md5sum": "9a106dbb585f440eefd650a30c92d41e" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "60314be432cbbceb2b26beb7c4c12bc5" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "2e20cf90ea344adf91b73da4792496f6" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "9735325774c479db3284f614c0b5a2f3" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a4ba04b17ed13bb749d64e3d0e1dc2e1" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "ea458dd8e0508f388273eee866c51c1c" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "b8f27759eea7866bbe715b9186d4a0f9" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 32047104, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 0 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 11272192 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 11288576 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 16531456 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20725760 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 20742144 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 32014336 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 32030720 + } + ], + "md5sum": "2dc7132af456d38537a6bb99a70ab6c5" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "e7a6e93a328238392bc8ca2d7eec7767" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "1e02c487b497570e3d57df01c2d47d74" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "0edfec015f54dea4104944ad420eb41e" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "fbb6f094071e1cd79947759598e186e4" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "c7d7e5e7750af89c731668a3f233df81" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "69fc94751f5275a05eda8fa8dcb16f07" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "73c657f6372ddea167f41d276226df7f" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "1b9c3588ebff4cfbb11671691755c5d9" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 32030720, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 0 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 11272192 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 11288576 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 16531456 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20725760 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 20742144 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 32014336 + } + ], + "md5sum": "d9e0e6969e54de148f2636986c8b0b09" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6e9b913bb6c330d452d17bd04da7ec6c" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "73a678dc082427bea815fd4855b60332" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "6d575b2d5f385bac1f33c37cd7113bfd" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "847817c9aa14d79cc94995df7828f36d" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ac37587cd5916205dec6a510c1968c23" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "dda4e2c28704135a1931f12a06627328" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "664544842533d7b57e4ef145a3f5b7eb" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 30195712, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 9453568 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20725760 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 20742144 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 25985024 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 30179328 + } + ], + "md5sum": "829ac06d5853be2e1d6c44a851041e31" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "3feff921b6050c51b44ff8f3e350f68c" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "b89c6f738ce0b2dd9a8c37475f5fa975" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "306dfb4344b7171cb43fb859886996ba" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "67bc23715f3c1766f4e4bae3fffcb181" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "ec460719cdda88a5c16bd924b0b311f8" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "52e4cbcc40bd03ad0ecda0693fe6a35e" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "6fd2721ad889eee8b4c78765186c90ea" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ec76890f6aa9fe84459d425cba8f935b" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 32030720, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 0 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 11272192 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 11288576 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 16531456 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20725760 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 20742144 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 32014336 + } + ], + "md5sum": "8f823c049586cea34cef8c987191c8f4" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "71f2f33634f3a8a04dc88e65beca472d" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "64a6e5c66b7d9e7f007a9f0f8cd954d5" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "b9f5799cba9ddd3ae6cefcae3f66433a" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "76cd50f10918da68dccd349afc3e9416" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "dffd84d23bec7c2c1051595a1078b26a" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d543ab39ad7827d895184f5488ed2550" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "8de48d97b570ed295217835613466611" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "b0af68e8deefbe1a428ac36ac7ebdbd5" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "a93aa31fc72f862f0b31afacb5958625" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 30179328, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 9453568 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20725760 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 20742144 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 25985024 + } + ], + "md5sum": "4908d031091a8bdf080d2eba1b81608b" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "67dc2852015061262462520d0eacc425" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "b2922c0c6dc32b58fe8aaa8d55f991aa" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "cbd7a0bf7deaba1bf3f28dfe58b4d17f" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "f1e0b592ef0178cf6f27b5c0ccf8620a" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "95ad51a607abef0d5f4495b531ec2ba0" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "aff45761a838c0ef241c092cc7f7869c" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 32047104, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 9453568 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20725760 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20742144 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 20758528 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 32030720 + } + ], + "md5sum": "4a1231fdcb432d7d806bcf833fe9b152" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1fb2d8d5fe95926b486be5e52ccb88e9" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "ec1d225d4a202dbcf21c3ff71f5bfbb9" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "3b10e134d837f0685e75f98238f5ff60" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "706ff469a5b0e36dcd7475692606b470" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ea351df7563380e19483d6213d1fb8e9" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c952885c073fad17cf28540d38124a81" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "72449bdbb5fe8df43bf9892e425cd9c7" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 30195712, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 9453568 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20725760 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 20742144 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 25985024 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 30179328 + } + ], + "md5sum": "f99b50c619b90399ff41e7062d8e95f3" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "5fbbb8b0f953f01862d438d0b87b80cb" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "f5fe3c7e858f5fc597576fa53dade3d0" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "4de2da4d8a9ce5f7e9d13a0e81e64815" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "10ce41809dd06c549cfb8307b48224c6" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "aac2d673168257efc1c674f5804208f9" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "167923f7d9de4621235fb282823e5fde" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "d8c0f7b1e22e22ac720ae73e0732e9b5" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "251e36a58cd77cf0c4477d6b86b2c7b6" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 32030720, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 0 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 11272192 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 11288576 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 16531456 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20725760 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 20742144 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 32014336 + } + ], + "md5sum": "e392fd762633eefd3688aa009928e897" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1c3e40ee13dd7e91b59443fc62e5a121" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "78316b0ff4f45ca14ea6ce2bcf9b1c91" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "e090320334b2356e11a0a5eae7dd071d" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "e2a74ca8a4e6903761bfb9c7deee867a" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "001997a877db3e1b46ecf9bb7d0f03d1" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3f1619cbae6fe078df0e79d7315d5f6d" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "2dcdf917152fd40fc77f1bb258b4c5ab" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 30195712, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 9453568 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20725760 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 20742144 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 25985024 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 30179328 + } + ], + "md5sum": "408821a27f2683354a5abb8c06b7cd4c" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "37da29920fd88b414009c5a4776bee23" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "b4e1ae27bef0a99a591b02fa0b2f2b93" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "49dad54c977308713252bfcc018e192f" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f7de981a0cfe3a3fef7387803fb70146" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "437e87ad8222576bd886b5a7fdae3b95" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "75868ff60be00c8f4b4e16fe6e62ed06" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ae553a0f893a2e2fb8d40134454ed73c" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "746ba68d8f25eea051a6b2efada2ea15" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "0623752169d0f8da3c55be450b73259a" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 30179328, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 0 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 11272192 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 11288576 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 16531456 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 20725760 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 25968640 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 30162944 + } + ], + "md5sum": "42dcf4dd022a2ebda95b2a21f4ac8736" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "a62c8e694f1118e275ddfed5f82ad234" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "a5c6b73f64f83629e0f5cff52a05aa3b" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "cc58840d1ed91f6b453314cadea263a0" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "2468320e2ca1a27e36d77333f17c9cde" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4dec7036c7361d20d4e23541b9d3ecba" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "4d8bd1e60368137cb2eb5d385d7421f4" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 32047104, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 0 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 11272192 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 11288576 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 11304960 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 22577152 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 22593536 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 27836416 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 32030720 + } + ], + "md5sum": "8a2275a40447b5172b7ad2dd0faa79e0" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "a0a4f7d6a9c6a96de83043ab9688a7fc" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "f6b960947885e9c0ae84ccb3401da062" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "5137e3d042f2d7d9f063c83eb4e469d2" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c5b5b3c41d32f5cb5e30bb315441cac8" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "4c295ca9fa51e9dccf550f456e6d66aa" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "75c7b5e57f672b70c68b402f74d796b5" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "a1a8637b84a8f668141ce7515e8b0996" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "79067cb19e6246fea44e9a999fef1db0" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 32030720, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 0 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 11272192 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 11288576 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 16531456 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20725760 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 20742144 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 32014336 + } + ], + "md5sum": "3f7158a031185eab529b7eba6d863b42" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d2bec13850f41ccac0266a148680f0be" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "3caf6a87fd2e160c33f4bdcc6b361e57" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "66ba1704ba0d7ddee6de94aae4747431" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "37a44e7909c5fa80ed5cea915d6059d3" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c534190d9bb254c0f9224bfac1c1ffb0" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a9f334c16540a23b777a94a4216a1e02" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "c1a622c960782c7e158d112ea34bd112" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 30195712, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 9453568 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20725760 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 20742144 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 25985024 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 30179328 + } + ], + "md5sum": "5c532fab950dca0552917f813c6858e2" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "97127155764f04f3928a7caabc0da9e3" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "1e481e57754c63804f692cfc72a04381" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.32.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "6ce1993381a4897e5fc03b10cf22c3a0" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "10e7f4c3cb3cb345fcf4a7e5b7e8b41d" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "68d28c6ef304d81a477714118bca64a4" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "2ec4f220947326bbbd43dd2b211e5a20" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "753fb753d34fc1539912af1ce8da4ec2" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.33.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "371ce029afda7d7982dd23dc5c34ad09" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 32030720, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 0 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 11272192 + }, + { + "name": "model.layers.32.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 11288576 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 16531456 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20725760 + }, + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 20742144 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 32014336 + } + ], + "md5sum": "1467657adc7bb62a7b3560cbfd545d05" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f711b3da890ef8540e4bb3956774bb17" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "61e136d626567e16543b0455f3282385" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.34.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "768be19f8ba7b3b554c9ad9946f86180" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 31981568, + "records": [ + { + "name": "model.layers.33.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 9437184 + } + ], + "md5sum": "d472681b10043a58d0e5d574f666da13" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "5f880db7fb136ed3ca199a4d8537f3de" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "f664780bb7911e40d0b29f51433574a0" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "b01f26fea51abcc632d027d9e88353d0" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "0248e2ba8f635ef7f72ea3f64e519abd" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "e92092cdfbfdbe420898587cd2984f47" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.35.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "d588e6b0b1a752593741e21830616878" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 32047104, + "records": [ + { + "name": "model.layers.34.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 9453568 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20725760 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20742144 + }, + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 20758528 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 32030720 + } + ], + "md5sum": "1b5199f746fa15798fa44909cf44d03f" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "abaaf51f5c5f6e53e392cef426c6b61f" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "a58d38f47d0af7582f0f0f7180e8408a" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "9ecdcfa7d11ef536de097270fe90b24d" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "bc97cf079996cc9c0506fab6c3484c1e" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.36.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "4411aa71fa2b2058fd7e3d81f2c81623" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2432cb8eb07e22bf81bafb25d2147952" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "1b66a94c6f7159c50aee5732fbd1a6df" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 30195712, + "records": [ + { + "name": "model.layers.35.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 9453568 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20725760 + }, + { + "name": "model.layers.36.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 20742144 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 25985024 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 30179328 + } + ], + "md5sum": "f2e08d974b9efdec88090d7dd1c46ed8" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "8109afe2f78b6221ba32741e1161fb41" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "7f075d34cf7a7a10f89dc3eb34e584d8" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.37.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "7995d391797920c7379045abcc05e2e9" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "36d35146238123f064434b928ee484af" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "0d7c48f12d1e2fe2ba55fb24e1bcfcdb" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "78d1ae98ab309845ff01a13357dd201b" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "79fc0ff229124ed6553762c072a75a10" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.38.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "45ef61b8174ad4d5f5ce0de3a296d96c" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 32030720, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 0 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 11272192 + }, + { + "name": "model.layers.37.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 11288576 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 16531456 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20725760 + }, + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 20742144 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 32014336 + } + ], + "md5sum": "bbc9302c3109319fae50efbb80348afd" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c75a3caf8eab3e12af9cb5da3933ccf7" + }, + { + "dataPath": "params_shard_259.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "e86a0f7d4fe0bb577bf0a90222ba1d7b" + }, + { + "dataPath": "params_shard_260.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "4d9bcafa816d0083b532163cd8ba6dc4" + }, + { + "dataPath": "params_shard_261.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "324e66ceb51fc7b56aad7cdb631b073b" + }, + { + "dataPath": "params_shard_262.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.39.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "5b0df4183b76a22aa627bb7a79474785" + }, + { + "dataPath": "params_shard_263.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "96b9fcdb955459f1ebc5bdd41f8be206" + }, + { + "dataPath": "params_shard_264.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2752 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "16e1791073f0543eec191f0f0f488e4e" + }, + { + "dataPath": "params_shard_265.bin", + "format": "raw-shard", + "nbytes": 30195712, + "records": [ + { + "name": "model.layers.38.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 9453568 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 20725760 + }, + { + "name": "model.layers.39.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 20742144 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 25985024 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 30179328 + } + ], + "md5sum": "3e67bc8b01e02d44b0e6af44f6ca8d0d" + }, + { + "dataPath": "params_shard_266.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "4f7e5f0909a97c5b2bc85153f7d1b7cd" + }, + { + "dataPath": "params_shard_267.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "5e75b888ad5ff662c799bec67628b319" + }, + { + "dataPath": "params_shard_268.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.40.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "a74ece3c89af915b9191668e8d7fea18" + }, + { + "dataPath": "params_shard_269.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "05580f22e1ae04d2e22a6acd46f9ca17" + }, + { + "dataPath": "params_shard_270.bin", + "format": "raw-shard", + "nbytes": 180355072, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 44032, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 180355072, + "byteOffset": 0 + } + ], + "md5sum": "2223977b8dffc448801550e72adf48a6" + }, + { + "dataPath": "params_shard_271.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 44032, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "1a97abee091b4f0bf8ca70dbd5a8cca3" + }, + { + "dataPath": "params_shard_272.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.41.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "9a17913afff948be1521eea67b99d9a1" + }, + { + "dataPath": "params_shard_273.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2e6eea4d63571bc29c9a1602b314bd83" + }, + { + "dataPath": "params_shard_274.bin", + "format": "raw-shard", + "nbytes": 30162944, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 8192, + 688 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 11272192, + "byteOffset": 0 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 16384, + "byteOffset": 11272192 + }, + { + "name": "model.layers.40.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 11288576 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 16531456 + }, + { + "name": "model.layers.41.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 5242880, + "byteOffset": 20725760 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 4194304, + "byteOffset": 25968640 + } + ], + "md5sum": "28ddcface17ab3f0068f4bf00e213047" + } + ] +} \ No newline at end of file