{ "metadata": { "ParamSize": 325, "ParamBytes": 4073875456.0, "BitsPerParam": 4.500422791965425 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 65544192, "records": [ { "name": "lm_head.q_weight", "shape": [ 32004, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65544192, "byteOffset": 0 } ], "md5sum": "3f5b54c3485156589749d23192de31ce" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "81ca36f076792afaf69d3fedffc39b08" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "f0e6d7788a55672dd5172066bd375a32" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d65abe2ab3628efcbe1192bc7c7d5082" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "479dd1fb58f97885e12acb9fba0d0459" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 30245888, "records": [ { "name": "lm_head.q_scale", "shape": [ 32004, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8193024, "byteOffset": 0 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8193024 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 8201216 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 11871232 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 19211264 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 19219456 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 19227648 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22897664 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30237696 } ], "md5sum": "a2432fdb129e0336d6a7c1f85aac9550" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d850a7cf3c7b0c35184e94b11f629d75" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "037effb370c069a26bc4af68c4e00d70" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "d512478c05886d853391a4a1ecd7af91" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "cc5236df3e2f0195b87973f0ab07c17c" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "bf28482b789f828d5d14468ef58c7fd4" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "72d869813229aeb5530c9121fc2f6a73" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "4f6a9a3ab2f81b1255865f2e40832505" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "43f023400fac62defc59aebcbd313c05" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "654f41352fa6c3b8c62dc673824cb0b7" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "921f2bf1f873e98761a9848304b66b5d" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "53b72e1e04d461d654479aaba43d4716" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "cfed542fb929f0043854a12976df6372" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "2def78561343d3efa2575f5b152bb1c2" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "7479ebb6f2a22c86dd862380d41d83df" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "5ff03fa4184f0c3e4e749a2a3287f444" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "ad0709340fe36b1d9fb5c44d938d336b" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "346ab47fe97532b0e4b271a582d32bf1" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "9feaf2c4fe3244dec0065b24dc4e4205" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "af626771041f4cf1fbc6029465dc7955" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "7298f146f6a279d02b1803269713756d" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2b0dfa366e6e2d2a67fb6f48edb9f216" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "e5fadffca8cde868acbb4b7bcd1e9dd8" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "740167c0278d3a214d59f5b9f71043db" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8fc8a8b561936d30825f4e43c71be6f4" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "45d6a2fb885a9792196963736600cfe3" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "d86b2a04c003b2994cecb547a1bbf395" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 65544192, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32004, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65544192, "byteOffset": 0 } ], "md5sum": "4a99d8236bd15ead42612a9ad9d72198" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4aa06aeca0e37d29ba88ef0343bdf850" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 31802368, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.embed_tokens.q_scale", "shape": [ 32004, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8193024, "byteOffset": 23601152 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 31794176 } ], "md5sum": "b6b359721b4eb28bcf83132c5a766c18" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "baf9782e0c2f90e0c0b51a58a6aaaaa1" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "51ac7444faddb4cc7189000e660008b5" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d8c6611fed6c8de7be5feda09d8d26cf" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "286e60c51e848cd262a438500620727a" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "072ffc6545f08275e83072431e7f1460" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "1b1d90bd0398320a77092cdf4eef50df" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 32505856, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 11010048 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 18350080 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 30932992 } ], "md5sum": "d99735133e22ed99dec2b062aa861bcb" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a7b486e9f5b3e6fe95fd5509d0a06dd2" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "14f8913ad049b187ee5119602fd97831" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "79a3f91aba49ee4ecb7ace10987e83db" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1f22a45a5470b6ebbadffe640799dea9" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "e2699f327913c8dd48fd41dc0a97f750" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "b98169088010f013d258cb7c20f5e360" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "75e9c9bb867a154576fc25757d45d20d" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "07f9a8ea3f5c56f0894d6a873b0fe610" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "033f70bbe0c3846b180d6aed15b89132" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "046a702b974c68e887a91d6ef102700e" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "38ada6512ea91774b7b2bfe71415329a" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "d9548541da5596540dae7ff37e2211cb" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "965a00e8ae6916a9f3ea037c31b3503f" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "87bad169f447f4f745f3c8c299e4745e" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "4a353ed4b0f7bfa11ce49305a1146c3d" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "9407bcfffbe598fcf767ead76ea4b9fb" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a966b89d5fa08e1ac0bee91fa3343ffa" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "cc4f6f54847dbdc7c6b6cad81b1315c4" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "a789bfe709edd4b40500c2bbe3184d74" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f7544b6a8b37df2d97a47d71ea03dd74" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "22d4db72b88da349214fdfbe1c824a31" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "eb9c0f1cf848627d82df1b249af38476" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "fff6f9286617c0c57afdabd4f89c7c26" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "d37ced2416d838b386b40fa2ca4006e2" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "9a7385e3d8f76d4861c99313fb71221b" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "32670946b1a3735e0fe5bb683addd163" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "30a30dd4ea86ed5636d0d16de384fe33" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3a7acdf7b8db5d3c84e42a401263e67f" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "65c43546cfb6a2b37395a3c64395c0e0" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 13115392 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 13123584 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 13131776 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 16801792 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24141824 } ], "md5sum": "31c8aff71ba71302a4a2d6d9a02b83fc" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4c32f6fe6095c961b4d6e31723f0b316" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "93a51534c0eac4c57689f3feb748a46e" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "d5fa7be5537d3a17841fc52102354cde" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6fef64053b23a8d17290a9e9336d0ba4" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "5923de5eddd2f079914b74f699699820" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "8cce659f3bb4bcb87fe841e584f46d67" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "77af592e021fc90d44782eb79894ef92" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f5dbf3fd0f1d7f0e81e47eb0f4254e8d" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "eb642616c7e1a370110c809705c53c4a" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "4127c3c7c4cba435ab1d3811162c7f17" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f05f757731a17773a9abcfffb09c0037" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "c0728aaabcce9687f2310c0ae0687d81" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "94fd970fd39965b46c28c8cd83407a2a" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "63ce8be8ec64b847717f8e252722cd5f" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "53cc16cf9636b55e03bf9e5c3b56451e" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "a09db55e7a2bb6e47797e736809fab43" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "aa03582a3a754b69a62045dc8812390a" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "9e10398989e176b7266a8293bf5c5d04" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "ff794d403269b38e993b1ad23c87b327" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "330fcf2c930c7dc9958ad410e3eacd1b" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "9317317f9abed71317bd9ca740c3442a" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "7148e14278d7376077d4a5e8b5f2308e" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "54ae4abd6d0773123602d8fe5e8478d1" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d6fa32059b4dfd5122011c5898e6b802" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "5ab1570e177042f06b091d50c0da3f7d" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "1b5269938e706927abd1b664f6383f91" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0907652449bb3672a447319e3fe6fbca" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "0c488c7bd52998e0ff74ba38a9d97190" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "6e5702366775ce1c51ba2397a5cf0025" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "00f0339be0e3074daf6ccbf8f064a716" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "861ef968cc9d434018c6742dc2515747" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "c94d5f04123f7511595986aabe21e1c6" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "f02ddb27ed92e8012b40f6a1a71d4f18" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 9437184 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 22020096 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 23592960 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31981568 } ], "md5sum": "d5790a1e7b02c1c3353c533c97b6a3dd" } ] }