{ "metadata": { "ParamSize": 265, "ParamBytes": 3107627008.0, "BitsPerParam": 4.50045192972882 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 65536000, "records": [ { "name": "lm_head.q_weight", "shape": [ 512, 32000 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65536000, "byteOffset": 0 } ], "md5sum": "a409a981ad84f817299f9e8cbca0f7e1" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 30744576, "records": [ { "name": "lm_head.q_scale", "shape": [ 128, 32000 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192000, "byteOffset": 0 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8192000 }, { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8200192 } ], "md5sum": "076fb6c403c3538299551eb2e94d7257" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "6c63b625d76fffd23010b3799dd1c493" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "652ddb3e66ff2e1dc9ceab7c65af2310" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "a1ab7d1ab9b43da11e2d1101159f15e9" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "74409504e34478aa6ef92ac3a342a934" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7cfce233ddb66d49d5ab9a8bde57aa4a" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "0a5a23a9dfd678e7a4dae3638228af86" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 65536000, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32000, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65536000, "byteOffset": 0 } ], "md5sum": "75a3d81b2e27df48f37bd595d28ea5d2" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "30c98d247944d7e0da87ae6a67640b99" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "375317c04a6996c79d674345e8c25d9a" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8f14de8ff5df1688de1e03713a7f98ce" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 29253632, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.embed_tokens.q_scale", "shape": [ 32000, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192000, "byteOffset": 9445376 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 17637376 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 17645568 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 20463616 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26099712 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 26107904 } ], "md5sum": "219d45c08879f93b4c7b06d966d566ab" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "6e8c9d40d55b79a19845b82125ff1696" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "159f066a82390b0b2d8accf17bd4c432" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "99bbd258294084f4b99c7a458f6e0304" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "ff7ebec29f14488cab654862c24b7132" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "579a50232cbf45da5960e4c475798427" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "04fb8e9e6d2521e78cb68209dd5a9b6a" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "25d18fb3a20fa3ae9a8935348ea5c035" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "27eabfb41e2c68c2b619aa4edacdaaa2" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "72b4ac22b1344d13ec5f51e7158e01d5" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "5903ac72a05ccce2ea1673c1208d7d7b" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "b795e957d464cfa9b459c86ef9ff1c5c" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "2a477833c673d1a040574d330f17a160" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "77be3027d7f678372b0ac28e1e37bf6e" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "9b2da97f30230ddb58da2895f2a16b15" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "84437559ad7fa36ad08155b870488c37" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "fd3633fbd600573aa1f593514877f56e" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e9109d14c28e5de6e4b809a6c0f1d598" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "b904f561d9231c8486d8afb7dba1f8ce" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "c1ae830edd2e74c56a0a9dbbec321156" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "862abc92c4da15c5d81671317d9c70a9" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "17aab2eefad954fd45111230aa27aac3" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "e24809e68cd5bd288f87f570b63d1465" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "6d6993867fd732e031063c3b6848c19c" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "81f376e03d4a77a2cd58422d137e1315" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "39abf5e21b66dd341a0150896382fcd6" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "1e366c5e53e8b2ec0c5867064b322d07" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "df8ed368a405cf435bb42457d4654259" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "6a55a426bb1a556d89923c95157469a7" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "e908e4beec62d629638872956bf6faad" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "465a0ea9544422afcb2b7ae4e98c3ba2" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d744005b5be13899fcc140f92eac09a4" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "57a2c445676f7f0274feea615482db30" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "bfa59dbf6704f699e2c2f9ed721c3817" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3e0bde7e78b0ff37495c47629f76f293" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "af24d399116c57eb8420393810e1201e" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "ba596f7ee732b7dcd553b530f449a2b2" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "0217d63d95ad2f3c49d2dedc7d342847" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0b3b1f7435dd5bd84d0516b09d5875c8" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "e9ca5b8912540ee90225f1e26aa7d3af" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "66d29961e47bd28a70a18c81e5058973" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "472680865faad735274034efa3c89680" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "342e42d356f1f42ee4d4fc9426be2174" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "225c64fbb4880415b7b20638a012c337" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "59400872f5dab6074f3513cca2e58308" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "669c15b0f39f6e0da1adbe4a6905cb0f" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "93ffd4ef02046b54aee9cc6620e54349" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "ef3e9fbf3f816684e3ff82823ed46573" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a53b2a06aaeea77986b792ff5f474190" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "0cd90333f37845eb77974346437cc7a6" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "642847c267bd2ca781a3a020ef000e03" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "06e82809cddc664b9e2be353ad9a4892" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c976aa05073380d7875ff692e1cba854" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "9ba910228deab9ccbadd6580bdfe76cf" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "5ae0846eedad155b86b59c41ea9b25df" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "44827a30956235a153652f69c0e1e71e" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "e4ec6844a6ed02177668dc2e9a397c47" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "7e16e5176e62827509cf97454c6e6350" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "67b951396c9f96e590139fbb3485b77f" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d2ea6c9bc20b5d7be95877fb4d14112a" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "37698956d7deb3c877f57c19e589baca" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "b991c7cba8703d73463cf223287a346d" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "029a7b9adfdb28f7dd4e23306fdf0450" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "68ee4f519923d7ce2dd18980c29e171e" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "098cb73642f9f9d10af1ab17de1c3955" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "4c5c6d88d7750222b36a7996dc3fdf98" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "32b805b49f17c7cac77edc2e19971e77" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "2887beec5731b4142a9ba7cfd7f42bdb" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "d0ba03bd66456c02ea322ea1f2043a7c" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1a208fe19eb0f09c49b6ab5c5ab226b4" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "e1dc400236414860c539b39068359537" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "75951612e72418c6d50ef779b8239576" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "8f9a4eccc93f160c05d04d7644252c6e" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b4b650a98ecd54c40f245bfcd8f3aea9" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "a442bdd342ba58f25223f8f96373e5e3" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "f33d7a81126f799e037b6ac3f30cb712" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "dcfe38d9fbd1128c329fe83d0bf5a7ad" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "f4bc3c05268c93cb2a4fdbd41c726375" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "be091162cce9407364cb4c4c5bbf1878" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "337cf6d940c8939419100442b5c15a3a" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ee7555052ca33b1ef6b7d16896a9d3c9" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 21045248, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 } ], "md5sum": "67483bce3bf4a60f4dc9ec6197e511f3" } ] }