{ "metadata": { "ParamSize": 325, "ParamBytes": 3631664128.0, "BitsPerParam": 3.617978559693305 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 211365888, "records": [ { "name": "lm_head.q_weight", "shape": [ 128256, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 211365888, "byteOffset": 0 } ], "md5sum": "9d7cfc3cfa0201de657a4fe9e4ad6432" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "7cc3f05367adee36db57422c18b58b24" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 211365888, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 128256, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 211365888, "byteOffset": 0 } ], "md5sum": "bd0817d78ce4cedb1c030489378edc6e" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 26420736, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 128256, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 26420736, "byteOffset": 0 } ], "md5sum": "7275a19f5d55f404043e9e4e380007e0" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "f03d82a88a503980612fd94e4c4b1672" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "79191feb5cdeb56cd0f76aa6d19afb75" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 32335360, "records": [ { "name": "lm_head.q_scale", "shape": [ 128256, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 26420736, "byteOffset": 0 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26420736 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 26428928 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29369856 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29378048 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29386240 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 29394432 } ], "md5sum": "c873766b2a18bb779b44c82cdcc72255" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "974db68a2c92be3d5543a52fb5210490" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "58829e4163a984ec306a89d54527e775" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "19d2449f4a98fb49432ef8533b39f97e" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "f2a5cf39b723950d85f4ce0f84705541" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "68e16b521a112347df61eaab5e5111f9" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "95bb1edfcad82b243a6f622d1769c7d4" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "01cb5fd3a44e45347b2886b11a92add4" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "e5b1bb74a8ac20e850f1283c3a7aff13" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "0fbd3bc70af5f5098f51a6b7a0b1c6eb" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "7907aca3573a63c50aaa3d134f8fdb17" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "ed2af9dbdd81559f3ff97cd39bd265e3" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "a05b15f46cc3233b3298a3754ef94f7e" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "449332198867986c4836c77864809462" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "a7f73d297f1c93610f7598897a3d0485" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "3bc48f644ad401556d7b5612e98121a5" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "d911eb0990a57f92e921b6240e4112c9" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "ed15446ab70e1849121a4dee5d3500f8" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "8596456e9fb015cc1fcc7bf91cc77d88" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "f41cf4629c02f7e03df0f84bcb282db3" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "b3773d02b5103d3ae9a63159be6a0817" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "43092c88f918ebbde6f9a47baa042d35" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "dbfdf8e42df279033b5fcd3d41eed344" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "ed6db02f89a401d5e4ad28cb1e26a5b9" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "4a987d08acdabed8ce58f13007284225" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "c8d77101caf0120e82663565d4cd8bb1" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "e01abe4fa1818ee8670c816138d69653" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "cf9489e839a8a5832f1110120caff0c4" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "77a3eb901c249cb396747a2f656af6f8" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "0fdf70177a8a9ac51de2456631ebe8c4" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "8d5619e1937a7f3a0b98ab04e0476902" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "e7345c84009248cd5078c172ce3d58a0" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "31881cf38c8064834acff3ab462c163c" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "7e2053562bbdf87e6e71fdabdae8747f" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "580a5efa5c0447aa84d4d9a7b37433a3" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "3c3d309e74410b2f2edf959ad6d3ada0" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "804fabf5bdb0bafc68bc3b6aa9e44d10" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "c0846cc2e72f63ef96846ca0984c4b7c" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "7c3e3fc4181e24e5dfa9ca0d4c72ac8c" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "2fe52fc7ed61352ed1f7356124607a61" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "2184e75a235f2e37a6f942ccdd07ae02" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "16ba517eb5a12d89ce24c4c0b523631d" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "cc88162c630457664b0dc08c7d37201c" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "42214bc01414cefc1a1d51a7084960ac" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "956d86f81d7eda9388f99a63e080e5ed" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "0c01ec56ffd8a515bd58d8b42f1c45e7" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "eb40a41fc70755fcdfba94a3f6796d77" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "dbf3f511be11a90c744efe78643a88a7" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "7b3e42059a35c78d99b3dc780b8dabd0" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "a2b4132263d30290a426226881c28ec8" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "f70a1590cad9ec80ee03ec8edb38e857" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "92a9e16ea3cd99e4a56b051e0e6517fd" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "983509dd82c311cd7c7c02feefd237a9" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "ffa72d135061bb10e4bc0be86db82c18" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "46e32ef9f9ef00d7cf644be371dff672" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "898fff3774c4328ae0d00ad1712cfc09" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 30806016, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 24899584 } ], "md5sum": "b5b9db6fcdd3e119f19d2016232a9f66" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "b9c4dfa3dbcdbd390de3805cb337dcd4" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "30a4bfc51fc1ffdb6fb1963ca6d6c0b6" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18984960 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 18993152 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 21934080 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27840512 } ], "md5sum": "50aaa86f1c15c2884e90eb0a6d7b53ef" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "04509146413ef30caf6e87ee3677e30d" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "8f80bcca4c9c924a745f2e3de489665d" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "de34285f8ba0ae30c21a5605a86e4c08" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 30806016, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18984960 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 18993152 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21934080 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21942272 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 21950464 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 24891392 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30797824 } ], "md5sum": "5d71bd568b0addd602b36552318b013f" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "471ddece5b870ae98f82319d39d39f5b" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "0224730db591e5bcf7c8c450d8bbbec5" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18984960 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 18993152 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 21934080 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27840512 } ], "md5sum": "703b75ca2a4c692a402301d3c64a3d07" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "24f52512ec1738e46db435e38e675355" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "68faaeff6039563c96022a09d2636319" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18984960 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 18993152 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 21934080 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27840512 } ], "md5sum": "243ffde9d0fc8820df553f57e3946cdc" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "6ed4ad0c8c60498e46ac787444213c65" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "8d7c99af82e33fca070f474e4d2b42b8" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18984960 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 18993152 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 21934080 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27840512 } ], "md5sum": "9e950ee78b7a23315d83ca7997c488fb" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "935b5326541ec2c8fc5c4ccc3d72ab3c" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "c292b8c570f60a97ceeb3ac5942a0061" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18984960 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 18993152 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 21934080 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27840512 } ], "md5sum": "53bd82929c944502a80d9b4d5403f308" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "588e124739298f00cf6450c121af4349" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "76a76e1b24a3c6ede611599d2909ff5e" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18984960 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 18993152 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 21934080 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27840512 } ], "md5sum": "a73f91382407fb49dd7f434e416f2dbc" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "3994443e445a592d0fe0d32c99f11e29" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "410b31c6fee42f631cf250bb7bb23cce" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18984960 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 18993152 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 21934080 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27840512 } ], "md5sum": "2219fc0a9ce627ea9a8d1b15fb54fec6" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "75bcccecd8d2ac66eb27f723cd6067ef" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "a4e31e9b08f87eb1fa5ef38a0d5a1997" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18984960 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 18993152 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 21934080 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27840512 } ], "md5sum": "b0d79411d7718d8f15d1e465e6a7f3f2" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "9f22ed5af89a62f3be9bef37303303d9" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "1678b0f5960b8f7a5f92a685f828424e" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18984960 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 18993152 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 21934080 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27840512 } ], "md5sum": "53a91c4c4f089c986849d1a39fb5ecc9" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "42a2d7e60807a0244fa2d4b547806618" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "f6a278c3cddc007626b9f4e2d9b65a50" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18984960 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 18993152 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 21934080 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27840512 } ], "md5sum": "d5f556bc88f77797efafcb4ace7cd6e1" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "23f27239caa1f7ab10d98ea6db711ff2" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 24891392, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 18984960 } ], "md5sum": "f98ff2d7dce677a8d6d46db30483e64b" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 18984960, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 18141184 } ], "md5sum": "c05b67f6a389b955356d503cae13ee4c" } ] }